Esempio n. 1
0
void call_filters(void)
{

	struct starpu_data_filter bcsr_f;
	struct starpu_data_filter vector_in_f, vector_out_f;

	bcsr_f.filter_func    = starpu_bcsr_filter_canonical_block;
	bcsr_f.get_nchildren = get_bcsr_nchildren;
	/* the children use a matrix interface ! */
	bcsr_f.get_child_ops = get_bcsr_child_ops;

	vector_in_f.filter_func = starpu_vector_filter_block;
	vector_in_f.nchildren  = size/c;
	vector_in_f.get_nchildren  = NULL;
	vector_in_f.get_child_ops  = NULL;
	
	vector_out_f.filter_func = starpu_vector_filter_block;
	vector_out_f.nchildren  = size/r;
	vector_out_f.get_nchildren  = NULL;
	vector_out_f.get_child_ops  = NULL;

	starpu_data_partition(sparse_matrix, &bcsr_f);

	starpu_data_partition(vector_in, &vector_in_f);
	starpu_data_partition(vector_out, &vector_out_f);
}
Esempio n. 2
0
static void partition_mult_data(void)
{
	gettimeofday(&start, NULL);

	starpu_matrix_data_register(&A_handle, 0, (uintptr_t)A, 
		ydim, ydim, zdim, sizeof(float));
	starpu_matrix_data_register(&B_handle, 0, (uintptr_t)B, 
		zdim, zdim, xdim, sizeof(float));
	starpu_matrix_data_register(&C_handle, 0, (uintptr_t)C, 
		ydim, ydim, xdim, sizeof(float));

	starpu_data_set_wt_mask(C_handle, 1<<0);

	conf.k = zdim;
	conf.m = ydim/nslicesy;
	conf.n = xdim/nslicesx;

	struct starpu_data_filter f;
	f.filter_func = starpu_vertical_block_filter_func;
	f.nchildren = nslicesx;
	f.get_nchildren = NULL;
	f.get_child_ops = NULL;
		
	struct starpu_data_filter f2;
	f2.filter_func = starpu_block_filter_func;
	f2.nchildren = nslicesy;
	f2.get_nchildren = NULL;
	f2.get_child_ops = NULL;
		
	starpu_data_partition(B_handle, &f);
	starpu_data_partition(A_handle, &f2);

	starpu_data_map_filters(C_handle, 2, &f, &f2);
}
Esempio n. 3
0
static void partition_mult_data(void)
{
	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A,
		ydim, ydim, zdim, sizeof(TYPE));
	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B,
		zdim, zdim, xdim, sizeof(TYPE));
	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C,
		ydim, ydim, xdim, sizeof(TYPE));

	struct starpu_data_filter vert;
	memset(&vert, 0, sizeof(vert));
	vert.filter_func = starpu_matrix_filter_vertical_block;
	vert.nchildren = nslicesx;

	struct starpu_data_filter horiz;
	memset(&horiz, 0, sizeof(horiz));
	horiz.filter_func = starpu_matrix_filter_block;
	horiz.nchildren = nslicesy;

	starpu_data_partition(B_handle, &vert);
	starpu_data_partition(A_handle, &horiz);

	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
}
/*
 * This function applies a data filter on all the elements of a partition
 */
static void map_filter(starpu_data_handle_t root_handle, struct starpu_data_filter *f)
{
	/* we need to apply the data filter on all leaf of the tree */
	if (root_handle->nchildren == 0)
	{
		/* this is a leaf */
		starpu_data_partition(root_handle, f);
	}
	else
	{
		/* try to apply the data filter recursively */
		unsigned child;
		for (child = 0; child < root_handle->nchildren; child++)
		{
			starpu_data_handle_t handle_child = starpu_data_get_child(root_handle, child);
			map_filter(handle_child, f);
		}
	}
}
int main(int argc, char **argv)
{
    unsigned *foo;
    starpu_data_handle_t handle;
    int ret;
    unsigned n, i, size;

    ret = starpu_initialize(NULL, &argc, &argv);
    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
    ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

    n = starpu_worker_get_count();
    if (n == 1)
    {
        starpu_shutdown();
        return STARPU_TEST_SKIPPED;
    }

    size = 10 * n;

    foo = (unsigned *) calloc(size, sizeof(*foo));
    for (i = 0; i < size; i++)
        foo[i] = i;

    starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo));

    /* Broadcast the data to force in-place partitioning */
    for (i = 0; i < n; i++)
        starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);

    struct starpu_data_filter f =
    {
        .filter_func = starpu_vector_filter_block,
        .nchildren = n,
    };

    starpu_data_partition(handle, &f);

    for (i = 0; i < f.nchildren; i++)
    {
        struct starpu_task *task = starpu_task_create();

        task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
        task->cl = &scal_codelet;
        task->execute_on_a_specific_worker = 1;
        task->workerid = i;

        ret = starpu_task_submit(task);
        if (ret == -ENODEV) goto enodev;
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
    }

    ret = starpu_task_wait_for_all();
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

    starpu_data_unpartition(handle, STARPU_MAIN_RAM);
    starpu_data_unregister(handle);
    starpu_shutdown();

    ret = EXIT_SUCCESS;
    for (i = 0; i < size; i++)
    {
        if (foo[i] != i*2)
        {
            FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i);
            ret = EXIT_FAILURE;
        }
    }

    return ret;

enodev:
    starpu_data_unregister(handle);
    fprintf(stderr, "WARNING: No one can execute this task\n");
    /* yes, we do not perform the computation but we did detect that no one
     * could perform the kernel, so this is not an error from StarPU */
    starpu_shutdown();
    return STARPU_TEST_SKIPPED;
}
Esempio n. 6
0
int main(int argc, char **argv)
{
	int ret, exit_value = 0;

	/* Initialize StarPU */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

	starpu_cublas_init();

	/* This is equivalent to
		vec_a = malloc(N*sizeof(TYPE));
		vec_b = malloc(N*sizeof(TYPE));
	*/
	starpu_malloc((void **)&_vec_x, N*sizeof(TYPE));
	assert(_vec_x);

	starpu_malloc((void **)&_vec_y, N*sizeof(TYPE));
	assert(_vec_y);

	unsigned i;
	for (i = 0; i < N; i++)
	{
		_vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */
		_vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */
	}

	FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]);
	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);

	/* Declare the data to StarPU */
	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));

	/* Divide the vector into blocks */
	struct starpu_data_filter block_filter =
	{
		.filter_func = starpu_vector_filter_block,
		.nchildren = NBLOCKS
	};

	starpu_data_partition(_handle_x, &block_filter);
	starpu_data_partition(_handle_y, &block_filter);

	double start;
	double end;

	start = starpu_timing_now();

	unsigned b;
	for (b = 0; b < NBLOCKS; b++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &axpy_cl;

		task->cl_arg = &_alpha;
		task->cl_arg_size = sizeof(_alpha);

		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);

		task->tag_id = b;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV)
		{
			exit_value = 77;
			goto enodev;
		}
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	starpu_task_wait_for_all();

enodev:
	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
	starpu_data_unregister(_handle_x);
	starpu_data_unregister(_handle_y);

	end = starpu_timing_now();
        double timing = end - start;

	FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing);

	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha);

	if (exit_value != 77)
		exit_value = check();

	starpu_free((void *)_vec_x);
	starpu_free((void *)_vec_y);

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	/* Stop StarPU */
	starpu_shutdown();

	return exit_value;
}
Esempio n. 7
0
int main(int argc, char **argv)
{
	int ret;

	assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
	assert(HEIGHT % FACTOR == 0);
	
	parse_args(argc, argv);

/*	fprintf(stderr, "Reading input file ...\n"); */

	/* how many frames ? */
	struct stat stbuf;
	stat(filename_in, &stbuf);
	size_t filesize = stbuf.st_size;

	unsigned nframes = filesize/FRAMESIZE; 

/*	fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */
	assert((filesize % sizeof(struct yuv_frame)) == 0);

	struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE);
	assert(yuv_in_buffer);

/*	fprintf(stderr, "Alloc output file ...\n"); */
	struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE);
	assert(yuv_out_buffer);

	/* fetch input data */
	FILE *f_in = fopen(filename_in, "r");
	assert(f_in);

	/* allocate room for an output buffer */
	FILE *f_out = fopen(filename_out, "w+");
	assert(f_out);

	fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);

	starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *frame_v_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));

	starpu_data_handle_t *new_frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *new_frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *new_frame_v_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/* register and partition all layers */
	unsigned frame;
	for (frame = 0; frame < nframes; frame++)
	{
		/* register Y layer */
		starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].y,
			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));

		starpu_data_partition(frame_y_handle[frame], &filter_y);

		starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].y,
			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));

		starpu_data_partition(new_frame_y_handle[frame], &filter_y);

		/* register U layer */
		starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].u,
			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(frame_u_handle[frame], &filter_uv);

		starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].u,
			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(new_frame_u_handle[frame], &filter_uv);

		/* register V layer */
		starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].v,
			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(frame_v_handle[frame], &filter_uv);

		starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].v,
			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(new_frame_v_handle[frame], &filter_uv);

	}

	/* how many tasks are there ? */
	unsigned nblocks_y = filter_y.nchildren;
	unsigned nblocks_uv = filter_uv.nchildren;

	unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes;

	fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes);
	start = starpu_timing_now();

	/* do the computation */
	for (frame = 0; frame < nframes; frame++)
	{
		unsigned blocky;
		for (blocky = 0; blocky < nblocks_y; blocky++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		unsigned blocku;
		for (blocku = 0; blocku < nblocks_uv; blocku++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		unsigned blockv;
		for (blockv = 0; blockv < nblocks_uv; blockv++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}
	}

	/* make sure all output buffers are sync'ed */
	for (frame = 0; frame < nframes; frame++)
	{
		starpu_data_unregister(frame_y_handle[frame]);
		starpu_data_unregister(frame_u_handle[frame]);
		starpu_data_unregister(frame_v_handle[frame]);

		starpu_data_unregister(new_frame_y_handle[frame]);
		starpu_data_unregister(new_frame_u_handle[frame]);
		starpu_data_unregister(new_frame_v_handle[frame]);
	}

	/* There is an implicit barrier: the unregister methods will block
	 * until the computation is done and that the result was put back into
	 * memory. */
	end = starpu_timing_now();

	double timing = end - start;
	printf("# s\tFPS\n");
	printf("%f\t%f\n", timing/1000000, (1000000*nframes)/timing);

	fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out);

	/* partition the layers into smaller parts */
	starpu_shutdown();

	if (fclose(f_in) != 0)
		fprintf(stderr, "Could not close %s properly\n", filename_in);

	if (fclose(f_out) != 0)
		fprintf(stderr, "Could not close %s properly\n", filename_out);

	return 0;
}
Esempio n. 8
0
int main(int argc, char **argv)
{
	int ret;
	unsigned part;
	double timing;
	double start, end;
	unsigned row, pos;
	unsigned ind;

	/* CSR matrix description */
	float *nzval;
	uint32_t nnz;
	uint32_t *colind;
	uint32_t *rowptr;
	
	/* Input and Output vectors */
	float *vector_in_ptr;
	float *vector_out_ptr;

	/*
	 *	Parse command-line arguments
	 */
	parse_args(argc, argv);

	/*
	 *	Launch StarPU
	 */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/*
	 *	Create a 3-band sparse matrix as input example
	 */
	nnz = 3*size-2;
	starpu_malloc((void **)&nzval, nnz*sizeof(float));
	starpu_malloc((void **)&colind, nnz*sizeof(uint32_t));
	starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t));
	assert(nzval && colind && rowptr);

	/* fill the matrix */
	for (row = 0, pos = 0; row < size; row++)
	{
		rowptr[row] = pos;

		if (row > 0)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row-1;
			pos++;
		}
		
		nzval[pos] = 5.0f;
		colind[pos] = row;
		pos++;

		if (row < size - 1)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row+1;
			pos++;
		}
	}

	STARPU_ASSERT(pos == nnz);

	rowptr[size] = nnz;
	
	/* initiate the 2 vectors */
	starpu_malloc((void **)&vector_in_ptr, size*sizeof(float));
	starpu_malloc((void **)&vector_out_ptr, size*sizeof(float));
	assert(vector_in_ptr && vector_out_ptr);

	/* fill them */
	for (ind = 0; ind < size; ind++)
	{
		vector_in_ptr[ind] = 2.0f;
		vector_out_ptr[ind] = 0.0f;
	}

	/*
	 *	Register the CSR matrix and the 2 vectors
	 */
	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));

	/*
	 *	Partition the CSR matrix and the output vector
	 */
	csr_f.nchildren = nblocks;
	vector_f.nchildren = nblocks;
	starpu_data_partition(sparse_matrix, &csr_f);
	starpu_data_partition(vector_out, &vector_f);

	/*
	 *	If we use OpenCL, we need to compile the SpMV kernel
	 */
#ifdef STARPU_USE_OPENCL
	compile_spmv_opencl_kernel();
#endif

	start = starpu_timing_now();

	/*
	 *	Create and submit StarPU tasks
	 */
	for (part = 0; part < nblocks; part++)
	{
		struct starpu_task *task = starpu_task_create();
		task->cl = &spmv_cl;
	
		task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
		task->handles[1] = vector_in;
		task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part);
	
		ret = starpu_task_submit(task);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			FPRINTF(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();
	end = starpu_timing_now();

	/*
	 *	Unregister the CSR matrix and the output vector
	 */
	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);

	/*
	 *	Unregister data
	 */
	starpu_data_unregister(sparse_matrix);
	starpu_data_unregister(vector_in);
	starpu_data_unregister(vector_out);

	/*
	 *	Display the result
	 */
	for (row = 0; row < STARPU_MIN(size, 16); row++)
	{
                FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]);
	}

	starpu_free(nzval);
	starpu_free(colind);
	starpu_free(rowptr);
	starpu_free(vector_in_ptr);
	starpu_free(vector_out_ptr);

	/*
	 *	Stop StarPU
	 */
	starpu_shutdown();

	timing = end - start;
	FPRINTF(stderr, "Computation took (in ms)\n");
	FPRINTF(stdout, "%2.2f\n", timing/1000);

	return 0;
}