int
main(void)
{
#ifdef STARPU_USE_CPU
	int ret;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	ncpu = starpu_cpu_worker_get_count();
#ifdef STARPU_USE_CUDA
	ncuda = starpu_cuda_worker_get_count();
#endif
#ifdef STARPU_USE_OPENCL
	nopencl = starpu_opencl_worker_get_count();
#endif

	if (ncpu == 0 || !gpus_available())
	{
		starpu_shutdown();
		return 77;
	}

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
	ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl", 
						  &opencl_conversion_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif
	init_problem_data();

	print_it();

	register_data();

	create_and_submit_tasks();

	unregister_data();

	print_it();

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
        starpu_opencl_unload_opencl(&opencl_conversion_program);
#endif
	starpu_shutdown();


	return check_it();
#else
	/* Without the CPU, there is no point in using the multiformat
	 * interface, so this test is pointless. */
	return 77;
#endif
}
int main(int argc, char **argv)
{
	int ret;

#ifdef STARPU_QUICK_CHECK
	n /= 10;
	k /= 16;
#endif

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_load_opencl_from_file("tests/datawizard/sync_and_notify_data_opencl_codelet.cl", &opencl_code, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

        starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned));

	unsigned iter;
	for (iter = 0; iter < k; iter++)
	{
		unsigned ind;
		for (ind = 0; ind < n; ind++)
		{
			/* increment a = v[0] */
			struct starpu_codelet cl_inc_a =
			{
				.cpu_funcs = {cpu_codelet_incA},
#ifdef STARPU_USE_CUDA
				.cuda_funcs = {cuda_codelet_incA},
				.cuda_flags = {STARPU_CUDA_ASYNC},
#endif
#ifdef STARPU_USE_OPENCL
				.opencl_funcs = {opencl_codelet_incA},
				.opencl_flags = {STARPU_OPENCL_ASYNC},
int main(int argc, char **argv)
{
	int ret;

	struct starpu_conf conf;
	starpu_conf_init(&conf);

	conf.sched_policy_name = "eager";
	conf.calibrate = 2;

	ret = starpu_initialize(&conf, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

	int slog;
	for (slog = START_LOG; slog < END_LOG; slog++)
	{
		int size = 1 << slog;
		test_memset(size);
	}

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif

	starpu_shutdown();

	return EXIT_SUCCESS;
}
int main(int argc, char **argv)
{
	int ret;

	/* Not supported yet */
	if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
		return 77;

	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/reductions/dot_product_opencl_kernels.cl",
						  &_opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

#ifdef STARPU_USE_CUDA
	/* cublasSdot has synchronization issues when using a non-blocking stream */
	cublasGetVersion(&cublas_version);
	if (cublas_version >= 7050)
		starpu_cublas_init();
#endif

	unsigned long nelems = _nblocks*_entries_per_block;
	size_t size = nelems*sizeof(float);

	_x = (float *) malloc(size);
	_y = (float *) malloc(size);

	_x_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t));
	_y_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t));

	assert(_x && _y);

        starpu_srand48(0);

	DOT_TYPE reference_dot = 0.0;

	unsigned long i;
	for (i = 0; i < nelems; i++)
	{
		_x[i] = (float)starpu_drand48();
		_y[i] = (float)starpu_drand48();

		reference_dot += (DOT_TYPE)_x[i]*(DOT_TYPE)_y[i];
	}

	unsigned block;
	for (block = 0; block < _nblocks; block++)
	{
		starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM,
			(uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float));
		starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM,
			(uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float));
	}

	starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE));

	/*
	 *	Compute dot product with StarPU
	 */
	starpu_data_set_reduction_methods(_dot_handle, &redux_codelet, &init_codelet);

	for (block = 0; block < _nblocks; block++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &dot_codelet;
		task->destroy = 1;

		task->handles[0] = _x_handles[block];
		task->handles[1] = _y_handles[block];
		task->handles[2] = _dot_handle;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV) goto enodev;
		STARPU_ASSERT(!ret);
	}

	for (block = 0; block < _nblocks; block++)
	{
		starpu_data_unregister(_x_handles[block]);
		starpu_data_unregister(_y_handles[block]);
	}
	starpu_data_unregister(_dot_handle);

	FPRINTF(stderr, "Reference : %e vs. %e (Delta %e)\n", reference_dot, _dot, reference_dot - _dot);

#ifdef STARPU_USE_CUDA
	if (cublas_version >= 7050)
		starpu_cublas_shutdown();
#endif

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&_opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	starpu_shutdown();

	free(_x);
	free(_y);
	free(_x_handles);
	free(_y_handles);

	if (fabs(reference_dot - _dot) < reference_dot * 1e-6)
		return EXIT_SUCCESS;
	else
		return EXIT_FAILURE;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	return 77;
}
Example #5
0
int main(int argc, char **argv)
{
	unsigned i;
        float foo;
	starpu_data_handle float_array_handle;
	starpu_codelet cl;

	starpu_init(NULL);
        if (argc == 2) niter = atoi(argv[1]);
        foo = 0.0f;

	starpu_variable_data_register(&float_array_handle, 0 /* home node */,
                                      (uintptr_t)&foo, sizeof(float));

#ifdef STARPU_USE_OPENCL
        starpu_opencl_load_opencl_from_file("examples/basic_examples/variable_kernels_opencl_codelet.cl", &opencl_code);
#endif

	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL;
        cl.cpu_func = cpu_codelet;
#ifdef STARPU_USE_CUDA
        cl.cuda_func = cuda_codelet;
#endif
#ifdef STARPU_USE_OPENCL
        cl.opencl_func = opencl_codelet;
#endif
        cl.nbuffers = 1;
        cl.model = NULL;

	for (i = 0; i < niter; i++)
	{
		struct starpu_task *task = starpu_task_create();
                int ret;

		task->cl = &cl;

		task->callback_func = NULL;

		task->buffers[0].handle = float_array_handle;
		task->buffers[0].mode = STARPU_RW;

		ret = starpu_task_submit(task, NULL);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			fprintf(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();

	/* update the array in RAM */
	starpu_data_acquire(float_array_handle, STARPU_R);

	fprintf(stderr, "variable -> %f\n", foo);

	starpu_data_release(float_array_handle);

	starpu_shutdown();

	return 0;
}
int main(int argc, char **argv)
{
	int ret;

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif
	A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned));

	starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned));
	starpu_vector_data_register(&B_handle, -1, (uintptr_t)NULL, VECTORSIZE, sizeof(unsigned));

	unsigned loop;
	for (loop = 0; loop < NLOOPS; loop++)
	{
		struct starpu_task *task_f = starpu_task_create();
		task_f->cl = &cl_f;
		task_f->handles[0] = A_handle;
		task_f->handles[1] = B_handle;

		ret = starpu_task_submit(task_f);
		if (ret == -ENODEV) goto enodev;
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	ret = starpu_task_wait_for_all();
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	starpu_shutdown();

	/* Check result */
	unsigned i;
	ret = EXIT_SUCCESS;
	for (i = 0; i < VECTORSIZE; i++)
	{
		if (A[i] != NLOOPS)
		{
			FPRINTF(stderr, "Error: Incorrect value A[%u] = %u != %d\n", i, A[i], NLOOPS);
			ret = EXIT_FAILURE;
			break;
		}
	}

	free(A);
	STARPU_RETURN(ret);

enodev:
	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	starpu_shutdown();
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	fprintf(stderr, "WARNING: No one can execute this task\n");
	return STARPU_TEST_SKIPPED;
}
int main(int argc, char **argv)
{
    unsigned *foo;
    starpu_data_handle_t handle;
    int ret;
    unsigned n, i, size;

    ret = starpu_initialize(NULL, &argc, &argv);
    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
    ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

    n = starpu_worker_get_count();
    if (n == 1)
    {
        starpu_shutdown();
        return STARPU_TEST_SKIPPED;
    }

    size = 10 * n;

    foo = (unsigned *) calloc(size, sizeof(*foo));
    for (i = 0; i < size; i++)
        foo[i] = i;

    starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo));

    /* Broadcast the data to force in-place partitioning */
    for (i = 0; i < n; i++)
        starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);

    struct starpu_data_filter f =
    {
        .filter_func = starpu_vector_filter_block,
        .nchildren = n,
    };

    starpu_data_partition(handle, &f);

    for (i = 0; i < f.nchildren; i++)
    {
        struct starpu_task *task = starpu_task_create();

        task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
        task->cl = &scal_codelet;
        task->execute_on_a_specific_worker = 1;
        task->workerid = i;

        ret = starpu_task_submit(task);
        if (ret == -ENODEV) goto enodev;
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
    }

    ret = starpu_task_wait_for_all();
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

    starpu_data_unpartition(handle, STARPU_MAIN_RAM);
    starpu_data_unregister(handle);
    starpu_shutdown();

    ret = EXIT_SUCCESS;
    for (i = 0; i < size; i++)
    {
        if (foo[i] != i*2)
        {
            FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i);
            ret = EXIT_FAILURE;
        }
    }

    return ret;

enodev:
    starpu_data_unregister(handle);
    fprintf(stderr, "WARNING: No one can execute this task\n");
    /* yes, we do not perform the computation but we did detect that no one
     * could perform the kernel, so this is not an error from StarPU */
    starpu_shutdown();
    return STARPU_TEST_SKIPPED;
}
Example #8
0
int main(int argc, char **argv)
{
	int ret, exit_value = 0;

	/* Initialize StarPU */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

	starpu_cublas_init();

	/* This is equivalent to
		vec_a = malloc(N*sizeof(TYPE));
		vec_b = malloc(N*sizeof(TYPE));
	*/
	starpu_malloc((void **)&_vec_x, N*sizeof(TYPE));
	assert(_vec_x);

	starpu_malloc((void **)&_vec_y, N*sizeof(TYPE));
	assert(_vec_y);

	unsigned i;
	for (i = 0; i < N; i++)
	{
		_vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */
		_vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */
	}

	FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]);
	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);

	/* Declare the data to StarPU */
	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));

	/* Divide the vector into blocks */
	struct starpu_data_filter block_filter =
	{
		.filter_func = starpu_vector_filter_block,
		.nchildren = NBLOCKS
	};

	starpu_data_partition(_handle_x, &block_filter);
	starpu_data_partition(_handle_y, &block_filter);

	double start;
	double end;

	start = starpu_timing_now();

	unsigned b;
	for (b = 0; b < NBLOCKS; b++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &axpy_cl;

		task->cl_arg = &_alpha;
		task->cl_arg_size = sizeof(_alpha);

		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);

		task->tag_id = b;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV)
		{
			exit_value = 77;
			goto enodev;
		}
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	starpu_task_wait_for_all();

enodev:
	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
	starpu_data_unregister(_handle_x);
	starpu_data_unregister(_handle_y);

	end = starpu_timing_now();
        double timing = end - start;

	FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing);

	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha);

	if (exit_value != 77)
		exit_value = check();

	starpu_free((void *)_vec_x);
	starpu_free((void *)_vec_y);

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	/* Stop StarPU */
	starpu_shutdown();

	return exit_value;
}
int main(int argc, char **argv)
{
	/* We consider a vector of float that is initialized just as any of C
	 * data */
	float vector[NX];
	unsigned i;
	for (i = 0; i < NX; i++)
		vector[i] = 1.0f;

	fprintf(stderr, "BEFORE : First element was %f\n", vector[0]);

	/* Initialize StarPU with default configuration */
	int ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL);
#endif

	/* Tell StaPU to associate the "vector" vector with the "vector_handle"
	 * identifier. When a task needs to access a piece of data, it should
	 * refer to the handle that is associated to it.
	 * In the case of the "vector" data interface:
	 *  - the first argument of the registration method is a pointer to the
	 *    handle that should describe the data
	 *  - the second argument is the memory node where the data (ie. "vector")
	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
	 *    opposed to an adress on a GPU for instance.
	 *  - the third argument is the adress of the vector in RAM
	 *  - the fourth argument is the number of elements in the vector
	 *  - the fifth argument is the size of each element.
	 */
	starpu_data_handle_t vector_handle;
	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
				    NX, sizeof(vector[0]));

	float factor = 3.14;

	/* create a synchronous task: any call to starpu_task_submit will block
	 * until it is terminated */
	struct starpu_task *task = starpu_task_create();
	task->synchronous = 1;

	task->cl = &cl;

	/* the codelet manipulates one buffer in RW mode */
	task->handles[0] = vector_handle;

	/* an argument is passed to the codelet, beware that this is a
	 * READ-ONLY buffer and that the codelet may be given a pointer to a
	 * COPY of the argument */
	task->cl_arg = &factor;
	task->cl_arg_size = sizeof(factor);

	/* execute the task on any eligible computational ressource */
	ret = starpu_task_submit(task);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

	/* StarPU does not need to manipulate the array anymore so we can stop
	 * monitoring it */
	starpu_data_unregister(vector_handle);

#ifdef STARPU_USE_OPENCL
	starpu_opencl_unload_opencl(&programs);
#endif

	/* terminate StarPU, no task can be submitted after */
	starpu_shutdown();

	fprintf(stderr, "AFTER First element is %f\n", vector[0]);

	return 0;
}
void cpu_to_opencl_opencl_func(void *buffers[], void *args)
{
	STARPU_SKIP_IF_VALGRIND;

	(void) args;
	int id, devid, ret;
        cl_int err;
	cl_kernel kernel;
	cl_command_queue queue;
	cl_event event;

	unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
	cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]);
	cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);

	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);

	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
						  &opencl_conversion_program,
						  NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");

	err = starpu_opencl_load_kernel(&kernel,
					&queue,
					&opencl_conversion_program,
					"cpu_to_opencl_opencl",
					devid);

	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 0, sizeof(src), &src);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 1, sizeof(dst), &dst);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 2, sizeof(n), &n);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);


	{
		size_t global=n;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel,
						device,
						CL_KERNEL_WORK_GROUP_SIZE,
						sizeof(local),
						&local,
						&s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);

                if (local > global)
			local = global;

		err = clEnqueueNDRangeKernel(queue,
					kernel,
					1,
					NULL,
					&global,
					&local,
					0,
					NULL,
					&event);

		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

	starpu_opencl_release_kernel(kernel);
        ret = starpu_opencl_unload_opencl(&opencl_conversion_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
}
int main(int argc, char **argv)
{
	int ret = 0;
	starpu_data_handle_t handle1;
	starpu_data_handle_t handle2;

	double real = 45.0;
	double imaginary = 12.0;
	double copy_real = 78.0;
	double copy_imaginary = 78.0;

	int compare;
	int *compare_ptr = &compare;

	ret = starpu_init(NULL);
	if (ret == -ENODEV) return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif
	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_compare,
				 STARPU_R, handle1,
				 STARPU_R, handle2,
				 STARPU_VALUE, &compare_ptr, sizeof(compare_ptr),
				 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
	starpu_task_wait_for_all();
	if (compare != 0)
	{
	     FPRINTF(stderr, "Complex numbers should NOT be similar\n");
	     goto end;
	}

	ret = starpu_task_insert(&cl_copy,
				 STARPU_R, handle1,
				 STARPU_W, handle2,
				 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_compare,
				 STARPU_R, handle1,
				 STARPU_R, handle2,
				 STARPU_VALUE, &compare_ptr, sizeof(compare_ptr),
				 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	starpu_task_wait_for_all();

	if (compare != 1)
	{
	     FPRINTF(stderr, "Complex numbers should be similar\n");
	}

end:
#ifdef STARPU_USE_OPENCL
	{
	     int ret2 = starpu_opencl_unload_opencl(&opencl_program);
	     STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl");
	}
#endif
	starpu_data_unregister(handle1);
	starpu_data_unregister(handle2);
	starpu_shutdown();
	if (ret == -ENODEV) return 77; else return !compare;
}
void test_variable_opencl_func(void *buffers[], void *args)
{
	STARPU_SKIP_IF_VALGRIND;

	int id, devid, ret;
	int factor = *(int *) args;

        cl_int             err;
	cl_kernel          kernel;
	cl_command_queue   queue;
	cl_event           event;

	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");

	cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]);

	cl_context context;
	id = starpu_worker_get_id();
	devid = starpu_worker_get_devid(id);
	starpu_opencl_get_context(devid, &context);

	cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
		sizeof(int), &variable_config.copy_failed, &err);

	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);


	err = starpu_opencl_load_kernel(&kernel,
					&queue,
					&opencl_program,
					"variable_opencl",
					devid);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err  = clSetKernelArg(kernel, 0, sizeof(val), &val);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 1, sizeof(fail), &fail);
	if (err)
		STARPU_OPENCL_REPORT_ERROR(err);

	err = clSetKernelArg(kernel, 2, sizeof(factor), &factor);
	if (err)
		STARPU_OPENCL_REPORT_ERROR(err);

	{
		size_t global = 1;
		size_t local;
                size_t s;
                cl_device_id device;

                starpu_opencl_get_device(devid, &device);

                err = clGetKernelWorkGroupInfo (kernel,
						device,
						CL_KERNEL_WORK_GROUP_SIZE,
						sizeof(local),
						&local,
						&s);
                if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);

                if (local > global)
			local = global;

		err = clEnqueueNDRangeKernel(queue,
					kernel,
					1,
					NULL,
					&global,
					&local,
					0,
					NULL,
					&event);

		if (err != CL_SUCCESS)
			STARPU_OPENCL_REPORT_ERROR(err);
	}

	err = clEnqueueReadBuffer(queue,
				  fail,
				  CL_TRUE,
				  0, 
				  sizeof(int),
				  &variable_config.copy_failed,
				  0,
				  NULL,
				  NULL);
	if (err != CL_SUCCESS)
		STARPU_OPENCL_REPORT_ERROR(err);

	clFinish(queue);
	starpu_opencl_collect_stats(event);
	clReleaseEvent(event);

	starpu_opencl_release_kernel(kernel);
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
	return;
}