void opencl_codelet_incC(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
{
	STARPU_SKIP_IF_VALGRIND;

	cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]);
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;
	int id, devid, err;

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "incC", devid);
	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
	if (err) STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=100;
		size_t local=100;
		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
	}
	starpu_opencl_release_kernel(kernel);
}
void opencl_codelet(void *descr[], void *_args)
{
	cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]);
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;
	int id, devid, err;

        id = starpu_worker_get_id();
        devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "incrementer", devid);
	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
	if (err) STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=4;
		size_t local, s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);
                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
                if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
                if (local > global) local=global;

		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

		starpu_opencl_release_kernel(kernel);
	}
}
void
opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
{
#if 0
	size_t dim[] = {nx, ny, nz};
#else
	size_t dim[] = {nx, ny, 1};
#endif

        int devid,id;
        id = starpu_worker_get_id();
        devid = starpu_worker_get_devid(id);

        cl_kernel kernel;
        cl_command_queue cq;
        starpu_opencl_load_kernel(&kernel, &cq, &program, "shadow", devid);

        clSetKernelArg(kernel, 0, sizeof(bz), &bz);
        clSetKernelArg(kernel, 1, sizeof(ptr), &ptr);
        clSetKernelArg(kernel, 2, sizeof(nx), &nx);
        clSetKernelArg(kernel, 3, sizeof(ny), &ny);
        clSetKernelArg(kernel, 4, sizeof(nz), &nz);
        clSetKernelArg(kernel, 5, sizeof(ldy), &ldy);
        clSetKernelArg(kernel, 6, sizeof(ldz), &ldz);
        clSetKernelArg(kernel, 7, sizeof(i), &i);

        cl_event ev;
        cl_int err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL);
        if (err != CL_SUCCESS)
                STARPU_OPENCL_REPORT_ERROR(err);
}
Exemplo n.º 4
0
void axpy_opencl(void *buffers[], void *_args)
{
	TYPE *alpha = _args;
	int id, devid;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
	cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
	cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_axpy_opencl", devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(x), &x);
	err|= clSetKernelArg(kernel, 1, sizeof(y), &y);
	err|= clSetKernelArg(kernel, 2, sizeof(n), &n);
	err|= clSetKernelArg(kernel, 3, sizeof(*alpha), alpha);
	if (err)
		STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=n;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
                if (local > global)
			local=global;

		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}
	starpu_opencl_release_kernel(kernel);
}
void redux_opencl_func(void *buffers[], void *args)
{
	int id, devid;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	cl_mem dota = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]);
	cl_mem dotb = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[1]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_redux_opencl", devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(dota), &dota);
	err|= clSetKernelArg(kernel, 1, sizeof(dotb), &dotb);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=1;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
                if (local > global)
			local=global;

		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}
	starpu_opencl_release_kernel(kernel);
}
void opencl_codelet(void *descr[], void *_args)
{
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;
	int id, devid, err;
	cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]);
	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
        int ldy = (int)STARPU_BLOCK_GET_LDY(descr[0]);
        int ldz = (int) STARPU_BLOCK_GET_LDZ(descr[0]);
        float *multiplier = (float *)_args;

        id = starpu_worker_get_id();
        devid = starpu_worker_get_devid(id);

        err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "block", devid);
        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

	CHECK_CL_SET_KERNEL_ARG(kernel, 0, sizeof(block), &block);
	CHECK_CL_SET_KERNEL_ARG(kernel, 1, sizeof(nx), &nx);
	CHECK_CL_SET_KERNEL_ARG(kernel, 2, sizeof(ny), &ny);
	CHECK_CL_SET_KERNEL_ARG(kernel, 3, sizeof(nz), &nz);
	CHECK_CL_SET_KERNEL_ARG(kernel, 4, sizeof(ldy), &ldy);
	CHECK_CL_SET_KERNEL_ARG(kernel, 5, sizeof(ldz), &ldz);
	CHECK_CL_SET_KERNEL_ARG(kernel, 6, sizeof(*multiplier), multiplier);

	{
                size_t global=nx*ny*nz;
		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

        starpu_opencl_release_kernel(kernel);
}
Exemplo n.º 7
0
void opencl_codelet(void *descr[], void *_args)
{
	cl_kernel kernel;
	cl_command_queue queue;
	int id, devid, err, n;
	float *block = (float *)STARPU_BLOCK_GET_PTR(descr[0]);
	int nx = (int)STARPU_BLOCK_GET_NX(descr[0]);
	int ny = (int)STARPU_BLOCK_GET_NY(descr[0]);
	int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]);
        unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]);
        unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]);
        float *multiplier = (float *)_args;

        id = starpu_worker_get_id();
        devid = starpu_worker_get_devid(id);

        err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "block", devid);
        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

	err = 0;
        n=0;
	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &block);
	err = clSetKernelArg(kernel, 1, sizeof(int), &nx);
	err = clSetKernelArg(kernel, 2, sizeof(int), &ny);
	err = clSetKernelArg(kernel, 3, sizeof(int), &nz);
	err = clSetKernelArg(kernel, 4, sizeof(ldy), &ldy);
	err = clSetKernelArg(kernel, 5, sizeof(ldz), &ldz);
	err = clSetKernelArg(kernel, 6, sizeof(float), multiplier);
        if (err) STARPU_OPENCL_REPORT_ERROR(err);

	{
                size_t global=nx*ny*nz;
		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);

        starpu_opencl_release_kernel(kernel);
}
void vector_scal_opencl(void *buffers[], void *_args)
{
	float *factor = _args;
	int id, devid, err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	/* length of the vector */
	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
	/* OpenCL copy of the vector pointer */
	cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &programs,
					"vector_mult_opencl", devid);   /* Name of the codelet defined above */
	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
	err |= clSetKernelArg(kernel, 1, sizeof(n), &n);
	err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor);
	if (err) STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=1;
		size_t local=1;
		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

	starpu_opencl_release_kernel(kernel);
}
void cpu_to_opencl_opencl_func(void *buffers[], void *args)
{
	(void) args;
	int id, devid;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	unsigned n = CUSTOM_GET_NX(buffers[0]);
	n*=2;
	struct point *aop;
	aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel,
					&queue,
					&_opencl_conversion_program,
					"custom_opencl_conversion",
					devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);


	void *x = CUSTOM_GET_OPENCL_X_PTR(buffers[0]);
	if (starpu_opencl_set_kernel_args(&err, &kernel,
					  sizeof(aop), &aop,
					  sizeof(x), &x,
					  sizeof(n), &n,
					  0) != 3)
	{
		STARPU_OPENCL_REPORT_ERROR(err);
		assert(0);
	}
	

	{
		size_t global=n;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel,
						device,
						CL_KERNEL_WORK_GROUP_SIZE,
						sizeof(local),
						&local,
						&s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);

                if (local > global)
			local = global;

		err = clEnqueueNDRangeKernel(
				queue,
				kernel,
				1,       /* work_dim */
				NULL,    /* global_work_offset */
				&global, /* global_work_size */
				&local,  /* local_work_size */
				0,       /* num_events_in_wait_list */
				NULL,    /* event_wait_list */
				NULL);

		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}
	starpu_opencl_release_kernel(kernel);
}
Exemplo n.º 10
0
#include <starpu_opencl.h>
#include <CL/cl.h>

extern struct starpu_opencl_program opencl_code;

void opencl_codelet_incA(void *descr[], __attribute__ ((unused)) void *_args)
{
        unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
	cl_kernel kernel;
	cl_command_queue queue;
	int id, devid, err;

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "incA", devid);
	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);

	err = 0;
	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);
	if (err) STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=100;
		size_t local=100;
		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);
void cpu_to_opencl_opencl_func(void *buffers[], void *args)
{
	STARPU_SKIP_IF_VALGRIND;

	(void) args;
	int id, devid, ret;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
	cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]);
	cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
						  &opencl_conversion_program,
						  NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");

	err = starpu_opencl_load_kernel(&kernel,
					&queue,
					&opencl_conversion_program,
					"cpu_to_opencl_opencl",
					devid);

	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(src), &src);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 1, sizeof(dst), &dst);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 2, sizeof(n), &n);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);


	{
		size_t global=n;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel,
						device,
						CL_KERNEL_WORK_GROUP_SIZE,
						sizeof(local),
						&local,
						&s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);

                if (local > global)
			local = global;

		err = clEnqueueNDRangeKernel(queue,
					kernel,
					1,
					NULL,
					&global,
					&local,
					0,
					NULL,
					&event);

		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

	starpu_opencl_release_kernel(kernel);
        ret = starpu_opencl_unload_opencl(&opencl_conversion_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
}
void multiformat_scal_opencl_func(void *buffers[], void *args)
{
	(void) args;
	int id, devid;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
	cl_mem val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	err = starpu_opencl_load_kernel(&kernel,
					&queue,
					&opencl_program,
					"multiformat_opencl",
					devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err  = clSetKernelArg(kernel, 0, sizeof(val), &val);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 1, sizeof(n), &n);
	if (err)
		STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global=n;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel,
						device,
						CL_KERNEL_WORK_GROUP_SIZE,
						sizeof(local),
						&local,
						&s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);

                if (local > global)
			local = global;

		err = clEnqueueNDRangeKernel(queue,
					kernel,
					1,
					NULL,
					&global,
					&local,
					0,
					NULL,
					&event);

		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

	starpu_opencl_release_kernel(kernel);
}
void test_variable_opencl_func(void *buffers[], void *args)
{
	STARPU_SKIP_IF_VALGRIND;

	int id, devid, ret;
	int factor = *(int *) args;

        cl_int             err;
	cl_kernel          kernel;
	cl_command_queue   queue;
	cl_event           event;

	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");

	cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]);

	cl_context context;
	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);
	starpu_opencl_get_context(devid, &context);

	cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
		sizeof(int), &variable_config.copy_failed, &err);

	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);


	err = starpu_opencl_load_kernel(&kernel,
					&queue,
					&opencl_program,
					"variable_opencl",
					devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err  = clSetKernelArg(kernel, 0, sizeof(val), &val);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 1, sizeof(fail), &fail);
	if (err)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 2, sizeof(factor), &factor);
	if (err)
		STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global = 1;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel,
						device,
						CL_KERNEL_WORK_GROUP_SIZE,
						sizeof(local),
						&local,
						&s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);

                if (local > global)
			local = global;

		err = clEnqueueNDRangeKernel(queue,
					kernel,
					1,
					NULL,
					&global,
					&local,
					0,
					NULL,
					&event);

		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}

	err = clEnqueueReadBuffer(queue,
				  fail,
				  CL_TRUE,
				  0, 
				  sizeof(int),
				  &variable_config.copy_failed,
				  0,
				  NULL,
				  NULL);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

	starpu_opencl_release_kernel(kernel);
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
	return;
}