void redux_cpu_func(void *descr[], void *cl_arg)
{
	DOT_TYPE *dota = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
	DOT_TYPE *dotb = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]);

	*dota = *dota + *dotb;
}
Exemplo n.º 2
0
void func_cpu_noargs(void *descr[], void *_args)
{
	int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
	float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);

        *x0 = *x0 * _ifactor;
        *x1 = *x1 * _ffactor;
}
void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
	int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
	int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]);

	*x0 += 1;
	*x1 *= *x1;
}
void redux_cpu_kernel(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
	*dst = *dst + *src;
}
/*
 *	Codelet to perform the reduction of two elements
 */
void redux_cpu_func(void *descr[], void *cl_arg)
{
	long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]);
	long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]);

	*dota = *dota + *dotb;
	FPRINTF_MPI(stderr, "Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb);
}
Exemplo n.º 6
0
void stencil5_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
	float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]);
	float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);
	float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]);
	float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]);
	float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]);

//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
//	fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1);
}
Exemplo n.º 7
0
void func_cpu_args(void *descr[], void *_args)
{
	int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
	float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]);
	int ifactor;
	float ffactor;

	starpu_codelet_unpack_args(_args, &ifactor, &ffactor);

        *x0 = *x0 * ifactor;
        *x1 = *x1 * ffactor;
}
Exemplo n.º 8
0
void minmax_redux_cpu_func(void *descr[], void *cl_arg)
{
	TYPE *array_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
	TYPE *array_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]);

	/* Compute the min value */
	TYPE min_dst = array_dst[0];
	TYPE min_src = array_src[0];
	array_dst[0] = STARPU_MIN(min_dst, min_src);

	/* Compute the max value */
	TYPE max_dst = array_dst[1];
	TYPE max_src = array_src[1];
	array_dst[1] = STARPU_MAX(max_dst, max_src);
}
void neutral_cpu_kernel(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
	*dst = 0;
}
Exemplo n.º 10
0
int convert_variable_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
{
	*ptr = STARPU_VARIABLE_GET_PTR(interface);
	(*ss).size = STARPU_VARIABLE_GET_ELEMSIZE(interface);

	return 0;
}
Exemplo n.º 11
0
void cpu_codelet(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);

	*val += 1;
	printf("task executing \n");
}
void func_cpu(void *descr[], void *args)
{
	float *x = (float *) STARPU_VARIABLE_GET_PTR(descr[0]);
	float factor;

	factor = *(float *) args;
        *x *= factor;
}
Exemplo n.º 13
0
static void redux_cuda_kernel(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);

	unsigned host_dst, host_src;

	/* This is a dummy technique of course */
	cudaMemcpyAsync(&host_src, src, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
	cudaMemcpyAsync(&host_dst, dst, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
	cudaStreamSynchronize(starpu_cuda_get_local_stream());

	host_dst += host_src;

	cudaMemcpyAsync(dst, &host_dst, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
}
Exemplo n.º 14
0
void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
	int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
	int rank;

	starpu_codelet_unpack_args(_args, &rank);
	FPRINTF_MPI(stderr, "Executing codelet with value %d and rank %d\n", *value, rank);
	STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank);
}
Exemplo n.º 15
0
uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle)
{
    unsigned node;
    node = _starpu_memory_node_get_local_key();

    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

    return STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node));
}
Exemplo n.º 16
0
static void neutral_cuda_kernel(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);

	/* This is a dummy technique of course */
	unsigned host_dst = 0;
	cudaMemcpyAsync(dst, &host_dst, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
}
Exemplo n.º 17
0
void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
{
	TYPE *array = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);

	/* Initialize current min to the greatest possible value. */
	array[0] = TYPE_MAX;

	/* Initialize current max to the smallest possible value. */
	array[1] = TYPE_MIN;
}
Exemplo n.º 18
0
void redux_opencl_func(void *buffers[], void *args)
{
	int id, devid;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	cl_mem dota = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]);
	cl_mem dotb = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[1]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_redux_opencl", devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(dota), &dota);
	err|= clSetKernelArg(kernel, 1, sizeof(dotb), &dotb);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=1;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
                if (local > global)
			local=global;

		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}
	starpu_opencl_release_kernel(kernel);
}
Exemplo n.º 19
0
static void redux_opencl_kernel(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	unsigned h_dst, h_src;

	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
	cl_mem d_src = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[1]);

	cl_command_queue queue;
	starpu_opencl_get_current_queue(&queue);

	/* This is a dummy technique of course */
	clEnqueueReadBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
	clEnqueueReadBuffer(queue, d_src, CL_TRUE, 0, sizeof(unsigned), (void *)&h_src, 0, NULL, NULL);

	h_dst += h_src;

	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
}
Exemplo n.º 20
0
static void neutral_opencl_kernel(void *descr[], void *arg)
{
	STARPU_SKIP_IF_VALGRIND;

	unsigned h_dst = 0;
	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);

	cl_command_queue queue;
	starpu_opencl_get_current_queue(&queue);

	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
}
/*
 *	Dot product codelet
 */
void dot_cpu_func(void *descr[], void *cl_arg)
{
	long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]);
	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);

	long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]);

	//FPRINTF_MPI(stderr, "Before dot=%ld (adding %d elements...)\n", *dot, n);
	unsigned i;
	for (i = 0; i < n; i++)
	{
		//FPRINTF_MPI(stderr, "Adding %ld\n", local_x[i]);
		*dot += local_x[i];
	}
	//FPRINTF_MPI(stderr, "After dot=%ld\n", *dot);
}
Exemplo n.º 22
0
static void register_variable_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
{
	unsigned node;
	for (node = 0; node < STARPU_MAXNODES; node++)
	{
		starpu_variable_interface_t *local_interface = 
			starpu_data_get_interface_on_node(handle, node);

		if (node == home_node) {
			local_interface->ptr = STARPU_VARIABLE_GET_PTR(interface);
		}
		else {
			local_interface->ptr = 0;
		}

		local_interface->elemsize = STARPU_VARIABLE_GET_ELEMSIZE(interface);
	}
}
Exemplo n.º 23
0
void dot_cpu_func(void *descr[], void *cl_arg)
{
	float *local_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
	float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
	DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]);

	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);

	DOT_TYPE local_dot = 0.0;

	unsigned i;
	for (i = 0; i < n; i++)
	{
		local_dot += (DOT_TYPE)local_x[i]*(DOT_TYPE)local_y[i];
	}

	*dot = *dot + local_dot;
}
Exemplo n.º 24
0
void init_opencl_func(void *buffers[], void *args)
{
        cl_int err;
	cl_command_queue queue;

	cl_mem dot = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]);
	starpu_opencl_get_current_queue(&queue);
	DOT_TYPE zero = (DOT_TYPE) 0.0;

	err = clEnqueueWriteBuffer(queue,
			dot,
			CL_TRUE,
			0,
			sizeof(DOT_TYPE),
			&zero,
			0,
			NULL,
			NULL);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

}
Exemplo n.º 25
0
void dot_cuda_func(void *descr[], void *cl_arg)
{
	DOT_TYPE current_dot;
	DOT_TYPE local_dot;

	float *local_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
	float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]);
	DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]);

	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);

	cudaMemcpyAsync(&current_dot, dot, sizeof(DOT_TYPE), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
	cudaStreamSynchronize(starpu_cuda_get_local_stream());

	local_dot = (DOT_TYPE)cublasSdot(n, local_x, 1, local_y, 1);

	/* FPRINTF(stderr, "current_dot %f local dot %f -> %f\n", current_dot, local_dot, current_dot + local_dot); */
	current_dot += local_dot;

	cudaMemcpyAsync(dot, &current_dot, sizeof(DOT_TYPE), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
	cudaStreamSynchronize(starpu_cuda_get_local_stream());
}
Exemplo n.º 26
0
void minmax_cpu_func(void *descr[], void *cl_arg)
{
	/* The array containing the values */
	TYPE *local_array = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);

	TYPE *minmax = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]);

	TYPE local_min = minmax[0];
	TYPE local_max = minmax[1];

	/* Compute the min and the max elements in the array */
	unsigned i;
	for (i = 0; i < n; i++)
	{
		TYPE val = local_array[i];
		local_min = STARPU_MIN(local_min, val);
		local_max = STARPU_MAX(local_max, val);
	}

	minmax[0] = local_min;
	minmax[1] = local_max;
}
Exemplo n.º 27
0
static void *variable_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
{
    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));

    return (void*) STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node));
}
/*
 *	Codelet to create a neutral element
 */
void init_cpu_func(void *descr[], void *cl_arg)
{
	long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]);
	*dot = 0;
	FPRINTF_MPI(stderr, "Init dot\n");
}
Exemplo n.º 29
0
void init_cuda_func(void *descr[], void *cl_arg)
{
	DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
	cudaMemsetAsync(dot, 0, sizeof(DOT_TYPE), starpu_cuda_get_local_stream());
}
Exemplo n.º 30
0
void init_cpu_func(void *descr[], void *cl_arg)
{
	DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
	*dot = 0.0f;
}