static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned reclevel)
{
	int ret;

	struct starpu_task *task = create_task(TAG21_AUX(k, j, reclevel));

	task->cl = &cl21;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j);

	if (j == k+1)
	{
		task->priority = STARPU_MAX_PRIO;
	}

	/* enforce dependencies ... */
	if (k > 0)
	{
		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 2, TAG11_AUX(k, reclevel), TAG22_AUX(k-1, k, j, reclevel));
	}
	else
	{
		starpu_tag_declare_deps(TAG21_AUX(k, j, reclevel), 1, TAG11_AUX(k, reclevel));
	}

	int n = starpu_matrix_get_nx(task->handles[0]);
	task->flops = FLOPS_STRSM(n, n);

	ret = starpu_task_submit(task);
	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	return ret;
}
Exemplo n.º 2
0
static void create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned j)
{
	int ret;
	struct starpu_task *task = create_task(TAG21(k, j));

	task->cl = &cl21;
	
	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j);

	if (!no_prio && (j == k+1))
	{
		task->priority = STARPU_MAX_PRIO;
	}

	/* enforce dependencies ... */
	if (k > 0)
	{
		starpu_tag_declare_deps(TAG21(k, j), 2, TAG11(k), TAG22(k-1, k, j));
	}
	else
	{
		starpu_tag_declare_deps(TAG21(k, j), 1, TAG11(k));
	}

	ret = starpu_task_submit(task);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
}
Exemplo n.º 3
0
static void create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned i)
{
	int ret;

/*	printf("task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG12(k,i)); */

	struct starpu_task *task = create_task(TAG12(k, i));
	
	task->cl = &cl12;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
	task->handles[1] = starpu_data_get_sub_data(dataA, 2, i, k);

	if (!no_prio && (i == k+1))
	{
		task->priority = STARPU_MAX_PRIO;
	}

	/* enforce dependencies ... */
	if (k > 0)
	{
		starpu_tag_declare_deps(TAG12(k, i), 2, TAG11(k), TAG22(k-1, i, k));
	}
	else
	{
		starpu_tag_declare_deps(TAG12(k, i), 1, TAG11(k));
	}

	ret = starpu_task_submit(task);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
}
Exemplo n.º 4
0
int main(int argc, char **argv)
{
	int ret;

	starpu_init(NULL);

	starpu_data_malloc_pinned_if_possible((void **)&buffer, VECTORSIZE);

	starpu_vector_data_register(&v_handle, 0, (uintptr_t)buffer, VECTORSIZE, sizeof(char));

	struct starpu_data_filter f = {
		.filter_func = starpu_vector_divide_in_2_filter_func,
		/* there are only 2 children */
		.nchildren = 2,
		/* the length of the first part */
		.filter_arg = VECTORSIZE/2,
		.get_nchildren = NULL,
		.get_child_ops = NULL
	};

	unsigned iter;
	for (iter = 0; iter < NITER; iter++)
	{
		starpu_data_map_filters(v_handle, 1, &f);
	
		ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 0));
		if (ret == -ENODEV)
			goto enodev;
	
		ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 1));
		if (ret == -ENODEV)
			goto enodev;
	
		starpu_task_wait_for_all();
	
		starpu_data_unpartition(v_handle, 0);
	
		ret = use_handle(v_handle);
		if (ret == -ENODEV)
			goto enodev;
	
		starpu_task_wait_for_all();
	}

	starpu_data_unregister(v_handle);

	starpu_shutdown();

	return 0;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	return 0;
}
Exemplo n.º 5
0
static void launch_codelets(void)
{
#ifdef STARPU_USE_FXT
	_starpu_fxt_register_thread(0);
#endif
	/* partition the work into slices */
	unsigned taskx, tasky;

	srand(time(NULL));

	/* should we use a single performance model for all archs and use an
 	 * acceleration factor ? */
	if (use_common_model) {
		cl.model = &sgemm_model_common;
	}
	else {
		cl.model = &sgemm_model;
	}

	for (taskx = 0; taskx < nslicesx; taskx++) 
	{
		for (tasky = 0; tasky < nslicesy; tasky++)
		{
			/* A B[task] = C[task] */
			struct starpu_task *task = starpu_task_create();

			task->cl = &cl;
			task->cl_arg = &conf;
			task->cl_arg_size = sizeof(struct block_conf);

			task->callback_func = callback_func;
			task->callback_arg = NULL;

			starpu_tag_t tag = TAG(taskx, tasky); 

			task->use_tag = 1;
			task->tag_id = tag;

			task->buffers[0].handle = starpu_data_get_sub_data(A_handle, 1, tasky);
			task->buffers[0].mode = STARPU_R;
			task->buffers[1].handle = starpu_data_get_sub_data(B_handle, 1, taskx);
			task->buffers[1].mode = STARPU_R;
			task->buffers[2].handle = 
				starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
			task->buffers[2].mode = STARPU_RW;

			starpu_task_submit(task, NULL);
		}
	}
}
static struct starpu_task * create_task_11(starpu_data_handle_t dataA, unsigned k)
{
/*	FPRINTF(stdout, "task 11 k = %d TAG = %llx\n", k, (TAG11(k))); */

	struct starpu_task *task = create_task(TAG11(k));

	task->cl = &cl11;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);

	/* this is an important task */
	if (!noprio)
		task->priority = STARPU_MAX_PRIO;

	/* enforce dependencies ... */
	if (k > 0)
	{
		starpu_tag_declare_deps(TAG11(k), 1, TAG22(k-1, k, k));
	}

	int n = starpu_matrix_get_nx(task->handles[0]);
	task->flops = FLOPS_SPOTRF(n);

	return task;
}
Exemplo n.º 7
0
static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
{
	int ret;
	struct starpu_task *task = starpu_task_create();
	task->cl = &cl12;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);

	task->tag_id = TAG12(k,j);

	if (!no_prio && (j == k+1))
		task->priority = STARPU_MAX_PRIO;

	ret = starpu_task_submit(task);
	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	return ret;
}
static void create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j)
{
/*	FPRINTF(stdout, "task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG22(k,i,j)); */

	struct starpu_task *task = create_task(TAG22(k, i, j));

	task->cl = &cl22;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i);
	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j);
	task->handles[2] = starpu_data_get_sub_data(dataA, 2, i, j);

	if (!noprio && (i == k + 1) && (j == k +1) )
	{
		task->priority = STARPU_MAX_PRIO;
	}

	/* enforce dependencies ... */
	if (k > 0)
	{
		starpu_tag_declare_deps(TAG22(k, i, j), 3, TAG22(k-1, i, j), TAG21(k, i), TAG21(k, j));
	}
	else
	{
		starpu_tag_declare_deps(TAG22(k, i, j), 2, TAG21(k, i), TAG21(k, j));
	}

	int n = starpu_matrix_get_nx(task->handles[0]);
	task->flops = FLOPS_SGEMM(n, n, n);

	int ret = starpu_task_submit(task);
        if (STARPU_UNLIKELY(ret == -ENODEV))
	{
                FPRINTF(stderr, "No worker may execute this task\n");
                exit(0);
        }
}
static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned reclevel)
{
	int ret;

/*	FPRINTF(stdout, "task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG22_AUX(k,i,j)); */

	struct starpu_task *task = create_task(TAG22_AUX(k, i, j, reclevel));

	task->cl = &cl22;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i);
	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j);
	task->handles[2] = starpu_data_get_sub_data(dataA, 2, i, j);

	if ( (i == k + 1) && (j == k +1) )
	{
		task->priority = STARPU_MAX_PRIO;
	}

	/* enforce dependencies ... */
	if (k > 0)
	{
		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 3, TAG22_AUX(k-1, i, j, reclevel), TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
	}
	else
	{
		starpu_tag_declare_deps(TAG22_AUX(k, i, j, reclevel), 2, TAG21_AUX(k, i, reclevel), TAG21_AUX(k, j, reclevel));
	}

	int n = starpu_matrix_get_nx(task->handles[0]);
	task->flops = FLOPS_SGEMM(n, n, n);

	ret = starpu_task_submit(task);
	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	return ret;
}
Exemplo n.º 10
0
static int create_task_11(starpu_data_handle_t dataA, unsigned k)
{
	int ret;
	struct starpu_task *task = starpu_task_create();
	task->cl = &cl11;

	/* which sub-data is manipulated ? */
	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);

	task->tag_id = TAG11(k);

	/* this is an important task */
	if (!no_prio)
		task->priority = STARPU_MAX_PRIO;

	ret = starpu_task_submit(task);
	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	return ret;
}
Exemplo n.º 11
0
int main(int argc, char **argv)
{
	int ret;

	assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
	assert(HEIGHT % FACTOR == 0);
	
	parse_args(argc, argv);

/*	fprintf(stderr, "Reading input file ...\n"); */

	/* how many frames ? */
	struct stat stbuf;
	stat(filename_in, &stbuf);
	size_t filesize = stbuf.st_size;

	unsigned nframes = filesize/FRAMESIZE; 

/*	fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */
	assert((filesize % sizeof(struct yuv_frame)) == 0);

	struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE);
	assert(yuv_in_buffer);

/*	fprintf(stderr, "Alloc output file ...\n"); */
	struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE);
	assert(yuv_out_buffer);

	/* fetch input data */
	FILE *f_in = fopen(filename_in, "r");
	assert(f_in);

	/* allocate room for an output buffer */
	FILE *f_out = fopen(filename_out, "w+");
	assert(f_out);

	fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);

	starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *frame_v_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));

	starpu_data_handle_t *new_frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *new_frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *new_frame_v_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/* register and partition all layers */
	unsigned frame;
	for (frame = 0; frame < nframes; frame++)
	{
		/* register Y layer */
		starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].y,
			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));

		starpu_data_partition(frame_y_handle[frame], &filter_y);

		starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].y,
			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));

		starpu_data_partition(new_frame_y_handle[frame], &filter_y);

		/* register U layer */
		starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].u,
			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(frame_u_handle[frame], &filter_uv);

		starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].u,
			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(new_frame_u_handle[frame], &filter_uv);

		/* register V layer */
		starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].v,
			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(frame_v_handle[frame], &filter_uv);

		starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].v,
			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(new_frame_v_handle[frame], &filter_uv);

	}

	/* how many tasks are there ? */
	unsigned nblocks_y = filter_y.nchildren;
	unsigned nblocks_uv = filter_uv.nchildren;

	unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes;

	fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes);
	start = starpu_timing_now();

	/* do the computation */
	for (frame = 0; frame < nframes; frame++)
	{
		unsigned blocky;
		for (blocky = 0; blocky < nblocks_y; blocky++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		unsigned blocku;
		for (blocku = 0; blocku < nblocks_uv; blocku++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		unsigned blockv;
		for (blockv = 0; blockv < nblocks_uv; blockv++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}
	}

	/* make sure all output buffers are sync'ed */
	for (frame = 0; frame < nframes; frame++)
	{
		starpu_data_unregister(frame_y_handle[frame]);
		starpu_data_unregister(frame_u_handle[frame]);
		starpu_data_unregister(frame_v_handle[frame]);

		starpu_data_unregister(new_frame_y_handle[frame]);
		starpu_data_unregister(new_frame_u_handle[frame]);
		starpu_data_unregister(new_frame_v_handle[frame]);
	}

	/* There is an implicit barrier: the unregister methods will block
	 * until the computation is done and that the result was put back into
	 * memory. */
	end = starpu_timing_now();

	double timing = end - start;
	printf("# s\tFPS\n");
	printf("%f\t%f\n", timing/1000000, (1000000*nframes)/timing);

	fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out);

	/* partition the layers into smaller parts */
	starpu_shutdown();

	if (fclose(f_in) != 0)
		fprintf(stderr, "Could not close %s properly\n", filename_in);

	if (fclose(f_out) != 0)
		fprintf(stderr, "Could not close %s properly\n", filename_out);

	return 0;
}
Exemplo n.º 12
0
int main(int argc, char **argv)
{
    unsigned *foo;
    starpu_data_handle_t handle;
    int ret;
    unsigned n, i, size;

    ret = starpu_initialize(NULL, &argc, &argv);
    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
    ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

    n = starpu_worker_get_count();
    if (n == 1)
    {
        starpu_shutdown();
        return STARPU_TEST_SKIPPED;
    }

    size = 10 * n;

    foo = (unsigned *) calloc(size, sizeof(*foo));
    for (i = 0; i < size; i++)
        foo[i] = i;

    starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo));

    /* Broadcast the data to force in-place partitioning */
    for (i = 0; i < n; i++)
        starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);

    struct starpu_data_filter f =
    {
        .filter_func = starpu_vector_filter_block,
        .nchildren = n,
    };

    starpu_data_partition(handle, &f);

    for (i = 0; i < f.nchildren; i++)
    {
        struct starpu_task *task = starpu_task_create();

        task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
        task->cl = &scal_codelet;
        task->execute_on_a_specific_worker = 1;
        task->workerid = i;

        ret = starpu_task_submit(task);
        if (ret == -ENODEV) goto enodev;
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
    }

    ret = starpu_task_wait_for_all();
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

    starpu_data_unpartition(handle, STARPU_MAIN_RAM);
    starpu_data_unregister(handle);
    starpu_shutdown();

    ret = EXIT_SUCCESS;
    for (i = 0; i < size; i++)
    {
        if (foo[i] != i*2)
        {
            FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i);
            ret = EXIT_FAILURE;
        }
    }

    return ret;

enodev:
    starpu_data_unregister(handle);
    fprintf(stderr, "WARNING: No one can execute this task\n");
    /* yes, we do not perform the computation but we did detect that no one
     * could perform the kernel, so this is not an error from StarPU */
    starpu_shutdown();
    return STARPU_TEST_SKIPPED;
}
Exemplo n.º 13
0
int main(int argc, char **argv)
{
	int ret, exit_value = 0;

	/* Initialize StarPU */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

	starpu_cublas_init();

	/* This is equivalent to
		vec_a = malloc(N*sizeof(TYPE));
		vec_b = malloc(N*sizeof(TYPE));
	*/
	starpu_malloc((void **)&_vec_x, N*sizeof(TYPE));
	assert(_vec_x);

	starpu_malloc((void **)&_vec_y, N*sizeof(TYPE));
	assert(_vec_y);

	unsigned i;
	for (i = 0; i < N; i++)
	{
		_vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */
		_vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */
	}

	FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]);
	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);

	/* Declare the data to StarPU */
	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));

	/* Divide the vector into blocks */
	struct starpu_data_filter block_filter =
	{
		.filter_func = starpu_vector_filter_block,
		.nchildren = NBLOCKS
	};

	starpu_data_partition(_handle_x, &block_filter);
	starpu_data_partition(_handle_y, &block_filter);

	double start;
	double end;

	start = starpu_timing_now();

	unsigned b;
	for (b = 0; b < NBLOCKS; b++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &axpy_cl;

		task->cl_arg = &_alpha;
		task->cl_arg_size = sizeof(_alpha);

		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);

		task->tag_id = b;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV)
		{
			exit_value = 77;
			goto enodev;
		}
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	starpu_task_wait_for_all();

enodev:
	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
	starpu_data_unregister(_handle_x);
	starpu_data_unregister(_handle_y);

	end = starpu_timing_now();
        double timing = end - start;

	FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing);

	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha);

	if (exit_value != 77)
		exit_value = check();

	starpu_free((void *)_vec_x);
	starpu_free((void *)_vec_y);

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	/* Stop StarPU */
	starpu_shutdown();

	return exit_value;
}
Exemplo n.º 14
0
starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp,
			unsigned nblocks STARPU_ATTRIBUTE_UNUSED, unsigned j, unsigned i)
{
	/* we use filters */
	return starpu_data_get_sub_data(*dataAp, 2, j, i);
}
Exemplo n.º 15
0
starpu_data_handle get_block_with_striding(starpu_data_handle *dataAp,
			unsigned nblocks __attribute__((unused)), unsigned j, unsigned i)
{
	/* we use filters */
	return starpu_data_get_sub_data(*dataAp, 2, j, i);
}
Exemplo n.º 16
0
int main(int argc, char **argv)
{
	double start, end;
	int ret;

	parse_args(argc, argv);

#ifdef STARPU_QUICK_CHECK
	niter /= 10;
#endif

	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	starpu_cublas_init();

	init_problem_data();
	partition_mult_data();

	if (bound)
		starpu_bound_start(0, 0);

	start = starpu_timing_now();

	unsigned x, y, iter;
	for (iter = 0; iter < niter; iter++)
	{
		for (x = 0; x < nslicesx; x++)
		for (y = 0; y < nslicesy; y++)
		{
			struct starpu_task *task = starpu_task_create();

			task->cl = &cl;

			task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y);
			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x);
			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y);

			task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim;

			ret = starpu_task_submit(task);
			if (ret == -ENODEV)
			{
			     ret = 77;
			     goto enodev;
			}
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		starpu_task_wait_for_all();
	}


	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	double min, min_int;
	double flops = 2.0*((unsigned long long)niter)*((unsigned long long)xdim)
		           *((unsigned long long)ydim)*((unsigned long long)zdim);

	if (bound)
		starpu_bound_compute(&min, &min_int, 1);

	PRINTF("# x\ty\tz\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops\tTims\tTiGFlops");
	PRINTF("\n");
	PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0);
	if (bound)
		PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0);
	PRINTF("\n");

enodev:
	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);

	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
	starpu_data_unregister(C_handle);

	if (check)
		check_output();

	starpu_free(A);
	starpu_free(B);
	starpu_free(C);

	starpu_cublas_shutdown();
	starpu_shutdown();

	return ret;
}
Exemplo n.º 17
0
static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
		ret = create_task_11(dataA, k);
		if (ret == -ENODEV) return ret;

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataA, k, i, j);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, i));
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	unsigned n = starpu_matrix_get_nx(dataA);
	double flop = (2.0f*n*n*n)/3.0f;

	PRINTF("# size\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops");
	PRINTF("\n");
	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
	if (bound)
	{
		double min;
		starpu_bound_compute(&min, NULL, 0);
		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
	}
	PRINTF("\n");

	return 0;
}
Exemplo n.º 18
0
void launch_spmv_codelets(void)
{
	struct starpu_task *task_tab;
	uint8_t *is_entry_tab;
	int ret;

	/* we call one codelet per block */
	unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix); 
	unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix); 

	remainingtasks = NSPMV*nblocks;
	totaltasks = remainingtasks;

	unsigned taskid = 0;

	task_tab = calloc(totaltasks, sizeof(struct starpu_task));
	STARPU_ASSERT(task_tab);

	is_entry_tab = calloc(totaltasks, sizeof(uint8_t));
	STARPU_ASSERT(is_entry_tab);

	printf("there will be %d codelets\n", remainingtasks);

	uint32_t *rowptr = starpu_bcsr_get_local_rowptr(sparse_matrix);
	uint32_t *colind = starpu_bcsr_get_local_colind(sparse_matrix);

	start = starpu_timing_now();

	unsigned loop;
	for (loop = 0; loop < NSPMV; loop++)
	{
		unsigned row;
		unsigned part = 0;

		for (row = 0; row < nrows; row++)
		{
			unsigned index;

			if (rowptr[row] == rowptr[row+1])
			{
				continue;
			}


			for (index = rowptr[row]; index < rowptr[row+1]; index++, part++)
			{
				struct starpu_task *task = &task_tab[taskid];
				starpu_task_init(task);

				task->use_tag = 1;
				task->tag_id = taskid;

				task->callback_func = init_problem_callback;
				task->callback_arg = &remainingtasks;
				task->cl = &cl;
				task->cl_arg = NULL;

				unsigned i = colind[index];
				unsigned j = row;

				task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
				task->handles[1] = starpu_data_get_sub_data(vector_in, 1, i);
				task->handles[2] = starpu_data_get_sub_data(vector_out, 1, j);

				/* all tasks in the same row are dependant so that we don't wait too much for data 
				 * we need to wait on the previous task if we are not the first task of a row */
				if (index != rowptr[row & ~0x3])
				{
					/* this is not the first task in the row */
					starpu_tag_declare_deps((starpu_tag_t)taskid, 1, (starpu_tag_t)(taskid-1));

					is_entry_tab[taskid] = 0;
				}
				else
				{
					/* this is an entry task */
					is_entry_tab[taskid] = 1;
				}

				taskid++;
			}
		}
	}

	printf("start submitting tasks !\n");

	/* submit ALL tasks now */
	unsigned nchains = 0;
	unsigned task;
	for (task = 0; task < totaltasks; task++)
	{
		if (is_entry_tab[task])
		{
			nchains++;
		}

		ret = starpu_task_submit(&task_tab[task]);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	printf("end of task submission (there was %d chains for %d tasks : ratio %d tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains);
}
Exemplo n.º 19
0
int main(int argc, char **argv)
{
	int ret;
	unsigned part;
	double timing;
	double start, end;
	unsigned row, pos;
	unsigned ind;

	/* CSR matrix description */
	float *nzval;
	uint32_t nnz;
	uint32_t *colind;
	uint32_t *rowptr;
	
	/* Input and Output vectors */
	float *vector_in_ptr;
	float *vector_out_ptr;

	/*
	 *	Parse command-line arguments
	 */
	parse_args(argc, argv);

	/*
	 *	Launch StarPU
	 */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/*
	 *	Create a 3-band sparse matrix as input example
	 */
	nnz = 3*size-2;
	starpu_malloc((void **)&nzval, nnz*sizeof(float));
	starpu_malloc((void **)&colind, nnz*sizeof(uint32_t));
	starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t));
	assert(nzval && colind && rowptr);

	/* fill the matrix */
	for (row = 0, pos = 0; row < size; row++)
	{
		rowptr[row] = pos;

		if (row > 0)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row-1;
			pos++;
		}
		
		nzval[pos] = 5.0f;
		colind[pos] = row;
		pos++;

		if (row < size - 1)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row+1;
			pos++;
		}
	}

	STARPU_ASSERT(pos == nnz);

	rowptr[size] = nnz;
	
	/* initiate the 2 vectors */
	starpu_malloc((void **)&vector_in_ptr, size*sizeof(float));
	starpu_malloc((void **)&vector_out_ptr, size*sizeof(float));
	assert(vector_in_ptr && vector_out_ptr);

	/* fill them */
	for (ind = 0; ind < size; ind++)
	{
		vector_in_ptr[ind] = 2.0f;
		vector_out_ptr[ind] = 0.0f;
	}

	/*
	 *	Register the CSR matrix and the 2 vectors
	 */
	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));

	/*
	 *	Partition the CSR matrix and the output vector
	 */
	csr_f.nchildren = nblocks;
	vector_f.nchildren = nblocks;
	starpu_data_partition(sparse_matrix, &csr_f);
	starpu_data_partition(vector_out, &vector_f);

	/*
	 *	If we use OpenCL, we need to compile the SpMV kernel
	 */
#ifdef STARPU_USE_OPENCL
	compile_spmv_opencl_kernel();
#endif

	start = starpu_timing_now();

	/*
	 *	Create and submit StarPU tasks
	 */
	for (part = 0; part < nblocks; part++)
	{
		struct starpu_task *task = starpu_task_create();
		task->cl = &spmv_cl;
	
		task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
		task->handles[1] = vector_in;
		task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part);
	
		ret = starpu_task_submit(task);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			FPRINTF(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();
	end = starpu_timing_now();

	/*
	 *	Unregister the CSR matrix and the output vector
	 */
	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);

	/*
	 *	Unregister data
	 */
	starpu_data_unregister(sparse_matrix);
	starpu_data_unregister(vector_in);
	starpu_data_unregister(vector_out);

	/*
	 *	Display the result
	 */
	for (row = 0; row < STARPU_MIN(size, 16); row++)
	{
                FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]);
	}

	starpu_free(nzval);
	starpu_free(colind);
	starpu_free(rowptr);
	starpu_free(vector_in_ptr);
	starpu_free(vector_out_ptr);

	/*
	 *	Stop StarPU
	 */
	starpu_shutdown();

	timing = end - start;
	FPRINTF(stderr, "Computation took (in ms)\n");
	FPRINTF(stdout, "%2.2f\n", timing/1000);

	return 0;
}