int main(int argc, char **argv)
{

	unsigned i;
	double timing;
	double start;
	double end;
	int ret;

	parse_args(argc, argv);

#ifdef STARPU_HAVE_VALGRIND_H
	if(RUNNING_ON_VALGRIND) ntasks = 5;
#endif

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	struct starpu_task task;

	starpu_task_init(&task);

	task.cl = &dummy_codelet;
	task.detach = 0;

	FPRINTF(stderr, "#tasks : %u\n", ntasks);

	start = starpu_timing_now();

	for (i = 0; i < ntasks; i++)
	{
		ret = starpu_task_submit(&task);
		if (ret == -ENODEV) goto enodev;
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

		ret = starpu_task_wait(&task);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
	}

	end = starpu_timing_now();

	timing = end - start;

	FPRINTF(stderr, "Total: %f secs\n", timing/1000000);
	FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks);

	starpu_task_clean(&task);
	starpu_shutdown();

	return EXIT_SUCCESS;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	starpu_shutdown();
	return STARPU_TEST_SKIPPED;
}
int cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned pinned)
{
	double start;
	double end;
	int ret;

	start = starpu_timing_now();

	ret = cholesky_grain_rec(matA, size, ld, nblocks, nbigblocks, 0);

	end = starpu_timing_now();

	double timing = end - start;

	double flop = (1.0f*size*size*size)/3.0f;

	PRINTF("# size\tms\tGFlops\n");
	PRINTF("%u\t%.0f\t%.1f\n", size, timing/1000, (flop/timing/1000.0f));

	return ret;
}
void init_problem_callback(void *arg)
{
	unsigned *remaining = arg;

	unsigned val = STARPU_ATOMIC_ADD(remaining, -1);

/*	if (val < 10)
		printf("callback %d remaining \n", val); */

	if ( val == 0 )
	{
		printf("DONE ...\n");
		end = starpu_timing_now();

		sem_post(&sem);
	}
}
int starpu_mct_compute_expected_times(struct starpu_sched_component *component, struct starpu_task *task,
		double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task,
		double *min_exp_end_with_task, double *max_exp_end_with_task, int *suitable_components)
{
	int nsuitable_components = 0;

	int i;
	for(i = 0; i < component->nchildren; i++)
	{
		struct starpu_sched_component * c = component->children[i];
		if(starpu_sched_component_execute_preds(c, task, estimated_lengths + i))
		{
			if(isnan(estimated_lengths[i]))
				/* The perfmodel had been purged since the task was pushed
				 * onto the mct component. */
				continue;

			/* Estimated availability of worker */
			double estimated_end = c->estimated_end(c);
			double now = starpu_timing_now();
			if (estimated_end < now)
				estimated_end = now;
			estimated_transfer_length[i] = starpu_sched_component_transfer_length(c, task);
			estimated_ends_with_task[i] = compute_expected_time(now,
									    estimated_end,
									    estimated_lengths[i],
									    estimated_transfer_length[i]);
			if(estimated_ends_with_task[i] < *min_exp_end_with_task)
				*min_exp_end_with_task = estimated_ends_with_task[i];
			if(estimated_ends_with_task[i] > *max_exp_end_with_task)
				*max_exp_end_with_task = estimated_ends_with_task[i];
			suitable_components[nsuitable_components++] = i;
		}
	}
	return nsuitable_components;
}
int main(int argc, char **argv)
{
    int ret;
    unsigned i;
    double timing;
    double start;
    double end;

#ifdef STARPU_QUICK_CHECK
    ntasks = 128;
#endif

    parse_args(argc, argv);

    ret = starpu_initialize(NULL, &argc, &argv);
    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

    fprintf(stderr, "#tasks : %u\n", ntasks);

    start = starpu_timing_now();
    for (i = 0; i < ntasks; i++)
    {
        ret = inject_one_task();
        if (ret == -ENODEV) goto enodev;
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
    }
    end = starpu_timing_now();

    timing = end - start;

    fprintf(stderr, "Total: %f secs\n", timing/1000000);
    fprintf(stderr, "Per task: %f usecs\n", timing/ntasks);

    {
        char *output_dir = getenv("STARPU_BENCH_DIR");
        char *bench_id = getenv("STARPU_BENCH_ID");

        if (output_dir && bench_id)
        {
            char file[1024];
            FILE *f;

            sprintf(file, "%s/sync_tasks_overhead_total.dat", output_dir);
            f = fopen(file, "a");
            fprintf(f, "%s\t%f\n", bench_id, timing/1000000);
            fclose(f);

            sprintf(file, "%s/sync_tasks_overhead_per_task.dat", output_dir);
            f = fopen(file, "a");
            fprintf(f, "%s\t%f\n", bench_id, timing/ntasks);
            fclose(f);
        }
    }

    starpu_shutdown();

    return EXIT_SUCCESS;

enodev:
    fprintf(stderr, "WARNING: No one can execute this task\n");
    /* yes, we do not perform the computation but we did detect that no one
     * could perform the kernel, so this is not an error from StarPU */
    starpu_shutdown();
    return STARPU_TEST_SKIPPED;
}
void launch_spmv_codelets(void)
{
	struct starpu_task *task_tab;
	uint8_t *is_entry_tab;
	int ret;

	/* we call one codelet per block */
	unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix); 
	unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix); 

	remainingtasks = NSPMV*nblocks;
	totaltasks = remainingtasks;

	unsigned taskid = 0;

	task_tab = calloc(totaltasks, sizeof(struct starpu_task));
	STARPU_ASSERT(task_tab);

	is_entry_tab = calloc(totaltasks, sizeof(uint8_t));
	STARPU_ASSERT(is_entry_tab);

	printf("there will be %d codelets\n", remainingtasks);

	uint32_t *rowptr = starpu_bcsr_get_local_rowptr(sparse_matrix);
	uint32_t *colind = starpu_bcsr_get_local_colind(sparse_matrix);

	start = starpu_timing_now();

	unsigned loop;
	for (loop = 0; loop < NSPMV; loop++)
	{
		unsigned row;
		unsigned part = 0;

		for (row = 0; row < nrows; row++)
		{
			unsigned index;

			if (rowptr[row] == rowptr[row+1])
			{
				continue;
			}


			for (index = rowptr[row]; index < rowptr[row+1]; index++, part++)
			{
				struct starpu_task *task = &task_tab[taskid];
				starpu_task_init(task);

				task->use_tag = 1;
				task->tag_id = taskid;

				task->callback_func = init_problem_callback;
				task->callback_arg = &remainingtasks;
				task->cl = &cl;
				task->cl_arg = NULL;

				unsigned i = colind[index];
				unsigned j = row;

				task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
				task->handles[1] = starpu_data_get_sub_data(vector_in, 1, i);
				task->handles[2] = starpu_data_get_sub_data(vector_out, 1, j);

				/* all tasks in the same row are dependant so that we don't wait too much for data 
				 * we need to wait on the previous task if we are not the first task of a row */
				if (index != rowptr[row & ~0x3])
				{
					/* this is not the first task in the row */
					starpu_tag_declare_deps((starpu_tag_t)taskid, 1, (starpu_tag_t)(taskid-1));

					is_entry_tab[taskid] = 0;
				}
				else
				{
					/* this is an entry task */
					is_entry_tab[taskid] = 1;
				}

				taskid++;
			}
		}
	}

	printf("start submitting tasks !\n");

	/* submit ALL tasks now */
	unsigned nchains = 0;
	unsigned task;
	for (task = 0; task < totaltasks; task++)
	{
		if (is_entry_tab[task])
		{
			nchains++;
		}

		ret = starpu_task_submit(&task_tab[task]);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	printf("end of task submission (there was %d chains for %d tasks : ratio %d tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains);
}
static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
{
	double start;
	double end;

	struct starpu_task *entry_task = NULL;

	/* create all the DAG nodes */
	unsigned i,j,k;

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
		struct starpu_task *task = create_task_11(dataA, k);
		/* we defer the launch of the first task */
		if (k == 0)
		{
			entry_task = task;
		}
		else
		{
			int ret = starpu_task_submit(task);
                        if (STARPU_UNLIKELY(ret == -ENODEV))
			{
                                FPRINTF(stderr, "No worker may execute this task\n");
                                exit(0);
                        }

		}

		for (j = k+1; j<nblocks; j++)
		{
			create_task_21(dataA, k, j);

			for (i = k+1; i<nblocks; i++)
			{
				if (i <= j)
					create_task_22(dataA, k, i, j);
			}
		}
	}

	/* schedule the codelet */
	int ret = starpu_task_submit(entry_task);
        if (STARPU_UNLIKELY(ret == -ENODEV))
	{
                FPRINTF(stderr, "No worker may execute this task\n");
                exit(0);
        }


	/* stall the application until the end of computations */
	starpu_tag_wait(TAG11(nblocks-1));

	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);

	end = starpu_timing_now();


	double timing = end - start;

	unsigned n = starpu_matrix_get_nx(dataA);

	double flop = (1.0f*n*n*n)/3.0f;

	PRINTF("# size\tms\tGFlops\n");
	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, (flop/timing/1000.0f));
}
static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
				  struct piv_s *piv_description,
				  unsigned nblocks,
				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
				  double *timing)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
	     ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
	     if (ret == -ENODEV) return ret;

		for (i = 0; i < nblocks; i++)
		{
			if (i != k)
			{
			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
			     if (ret == -ENODEV) return ret;
			}
		}

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(get_block(dataAp, nblocks, k, i));
		    starpu_data_wont_use(get_block(dataAp, nblocks, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	*timing = end - start;
	return 0;
}
Example #9
0
static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
		ret = create_task_11(dataA, k);
		if (ret == -ENODEV) return ret;

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataA, k, i, j);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, i));
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	unsigned n = starpu_matrix_get_nx(dataA);
	double flop = (2.0f*n*n*n)/3.0f;

	PRINTF("# size\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops");
	PRINTF("\n");
	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
	if (bound)
	{
		double min;
		starpu_bound_compute(&min, NULL, 0);
		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
	}
	PRINTF("\n");

	return 0;
}
Example #10
0
int main(int argc, char **argv)
{
	int ret, exit_value = 0;

	/* Initialize StarPU */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

	starpu_cublas_init();

	/* This is equivalent to
		vec_a = malloc(N*sizeof(TYPE));
		vec_b = malloc(N*sizeof(TYPE));
	*/
	starpu_malloc((void **)&_vec_x, N*sizeof(TYPE));
	assert(_vec_x);

	starpu_malloc((void **)&_vec_y, N*sizeof(TYPE));
	assert(_vec_y);

	unsigned i;
	for (i = 0; i < N; i++)
	{
		_vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */
		_vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */
	}

	FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]);
	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);

	/* Declare the data to StarPU */
	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));

	/* Divide the vector into blocks */
	struct starpu_data_filter block_filter =
	{
		.filter_func = starpu_vector_filter_block,
		.nchildren = NBLOCKS
	};

	starpu_data_partition(_handle_x, &block_filter);
	starpu_data_partition(_handle_y, &block_filter);

	double start;
	double end;

	start = starpu_timing_now();

	unsigned b;
	for (b = 0; b < NBLOCKS; b++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &axpy_cl;

		task->cl_arg = &_alpha;
		task->cl_arg_size = sizeof(_alpha);

		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);

		task->tag_id = b;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV)
		{
			exit_value = 77;
			goto enodev;
		}
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	starpu_task_wait_for_all();

enodev:
	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
	starpu_data_unregister(_handle_x);
	starpu_data_unregister(_handle_y);

	end = starpu_timing_now();
        double timing = end - start;

	FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing);

	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha);

	if (exit_value != 77)
		exit_value = check();

	starpu_free((void *)_vec_x);
	starpu_free((void *)_vec_y);

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	/* Stop StarPU */
	starpu_shutdown();

	return exit_value;
}
int main(int argc, char **argv)
{
	int ret;

	assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
	assert(HEIGHT % FACTOR == 0);
	
	parse_args(argc, argv);

/*	fprintf(stderr, "Reading input file ...\n"); */

	/* how many frames ? */
	struct stat stbuf;
	stat(filename_in, &stbuf);
	size_t filesize = stbuf.st_size;

	unsigned nframes = filesize/FRAMESIZE; 

/*	fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */
	assert((filesize % sizeof(struct yuv_frame)) == 0);

	struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE);
	assert(yuv_in_buffer);

/*	fprintf(stderr, "Alloc output file ...\n"); */
	struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE);
	assert(yuv_out_buffer);

	/* fetch input data */
	FILE *f_in = fopen(filename_in, "r");
	assert(f_in);

	/* allocate room for an output buffer */
	FILE *f_out = fopen(filename_out, "w+");
	assert(f_out);

	fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);

	starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *frame_v_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));

	starpu_data_handle_t *new_frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *new_frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
	starpu_data_handle_t *new_frame_v_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/* register and partition all layers */
	unsigned frame;
	for (frame = 0; frame < nframes; frame++)
	{
		/* register Y layer */
		starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].y,
			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));

		starpu_data_partition(frame_y_handle[frame], &filter_y);

		starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].y,
			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));

		starpu_data_partition(new_frame_y_handle[frame], &filter_y);

		/* register U layer */
		starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].u,
			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(frame_u_handle[frame], &filter_uv);

		starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].u,
			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(new_frame_u_handle[frame], &filter_uv);

		/* register V layer */
		starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_in_buffer[frame].v,
			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(frame_v_handle[frame], &filter_uv);

		starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM,
			(uintptr_t)&yuv_out_buffer[frame].v,
			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));

		starpu_data_partition(new_frame_v_handle[frame], &filter_uv);

	}

	/* how many tasks are there ? */
	unsigned nblocks_y = filter_y.nchildren;
	unsigned nblocks_uv = filter_uv.nchildren;

	unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes;

	fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes);
	start = starpu_timing_now();

	/* do the computation */
	for (frame = 0; frame < nframes; frame++)
	{
		unsigned blocky;
		for (blocky = 0; blocky < nblocks_y; blocky++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		unsigned blocku;
		for (blocku = 0; blocku < nblocks_uv; blocku++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		unsigned blockv;
		for (blockv = 0; blockv < nblocks_uv; blockv++)
		{
			struct starpu_task *task = starpu_task_create();
			task->cl = &ds_codelet;

			/* input */
			task->handles[0] = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);

			/* output */
			task->handles[1] = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);

			ret = starpu_task_submit(task);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}
	}

	/* make sure all output buffers are sync'ed */
	for (frame = 0; frame < nframes; frame++)
	{
		starpu_data_unregister(frame_y_handle[frame]);
		starpu_data_unregister(frame_u_handle[frame]);
		starpu_data_unregister(frame_v_handle[frame]);

		starpu_data_unregister(new_frame_y_handle[frame]);
		starpu_data_unregister(new_frame_u_handle[frame]);
		starpu_data_unregister(new_frame_v_handle[frame]);
	}

	/* There is an implicit barrier: the unregister methods will block
	 * until the computation is done and that the result was put back into
	 * memory. */
	end = starpu_timing_now();

	double timing = end - start;
	printf("# s\tFPS\n");
	printf("%f\t%f\n", timing/1000000, (1000000*nframes)/timing);

	fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out);

	/* partition the layers into smaller parts */
	starpu_shutdown();

	if (fclose(f_in) != 0)
		fprintf(stderr, "Could not close %s properly\n", filename_in);

	if (fclose(f_out) != 0)
		fprintf(stderr, "Could not close %s properly\n", filename_out);

	return 0;
}
Example #12
0
int main(int argc, char **argv)
{
	double start, end;
	int ret;

	parse_args(argc, argv);

#ifdef STARPU_QUICK_CHECK
	niter /= 10;
#endif

	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	starpu_cublas_init();

	init_problem_data();
	partition_mult_data();

	if (bound)
		starpu_bound_start(0, 0);

	start = starpu_timing_now();

	unsigned x, y, iter;
	for (iter = 0; iter < niter; iter++)
	{
		for (x = 0; x < nslicesx; x++)
		for (y = 0; y < nslicesy; y++)
		{
			struct starpu_task *task = starpu_task_create();

			task->cl = &cl;

			task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y);
			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x);
			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y);

			task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim;

			ret = starpu_task_submit(task);
			if (ret == -ENODEV)
			{
			     ret = 77;
			     goto enodev;
			}
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		starpu_task_wait_for_all();
	}


	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	double min, min_int;
	double flops = 2.0*((unsigned long long)niter)*((unsigned long long)xdim)
		           *((unsigned long long)ydim)*((unsigned long long)zdim);

	if (bound)
		starpu_bound_compute(&min, &min_int, 1);

	PRINTF("# x\ty\tz\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops\tTims\tTiGFlops");
	PRINTF("\n");
	PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0);
	if (bound)
		PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0);
	PRINTF("\n");

enodev:
	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);

	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
	starpu_data_unregister(C_handle);

	if (check)
		check_output();

	starpu_free(A);
	starpu_free(B);
	starpu_free(C);

	starpu_cublas_shutdown();
	starpu_shutdown();

	return ret;
}
Example #13
0
int main(int argc, char **argv)
{
	int ret;
	unsigned part;
	double timing;
	double start, end;
	unsigned row, pos;
	unsigned ind;

	/* CSR matrix description */
	float *nzval;
	uint32_t nnz;
	uint32_t *colind;
	uint32_t *rowptr;
	
	/* Input and Output vectors */
	float *vector_in_ptr;
	float *vector_out_ptr;

	/*
	 *	Parse command-line arguments
	 */
	parse_args(argc, argv);

	/*
	 *	Launch StarPU
	 */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/*
	 *	Create a 3-band sparse matrix as input example
	 */
	nnz = 3*size-2;
	starpu_malloc((void **)&nzval, nnz*sizeof(float));
	starpu_malloc((void **)&colind, nnz*sizeof(uint32_t));
	starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t));
	assert(nzval && colind && rowptr);

	/* fill the matrix */
	for (row = 0, pos = 0; row < size; row++)
	{
		rowptr[row] = pos;

		if (row > 0)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row-1;
			pos++;
		}
		
		nzval[pos] = 5.0f;
		colind[pos] = row;
		pos++;

		if (row < size - 1)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row+1;
			pos++;
		}
	}

	STARPU_ASSERT(pos == nnz);

	rowptr[size] = nnz;
	
	/* initiate the 2 vectors */
	starpu_malloc((void **)&vector_in_ptr, size*sizeof(float));
	starpu_malloc((void **)&vector_out_ptr, size*sizeof(float));
	assert(vector_in_ptr && vector_out_ptr);

	/* fill them */
	for (ind = 0; ind < size; ind++)
	{
		vector_in_ptr[ind] = 2.0f;
		vector_out_ptr[ind] = 0.0f;
	}

	/*
	 *	Register the CSR matrix and the 2 vectors
	 */
	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));

	/*
	 *	Partition the CSR matrix and the output vector
	 */
	csr_f.nchildren = nblocks;
	vector_f.nchildren = nblocks;
	starpu_data_partition(sparse_matrix, &csr_f);
	starpu_data_partition(vector_out, &vector_f);

	/*
	 *	If we use OpenCL, we need to compile the SpMV kernel
	 */
#ifdef STARPU_USE_OPENCL
	compile_spmv_opencl_kernel();
#endif

	start = starpu_timing_now();

	/*
	 *	Create and submit StarPU tasks
	 */
	for (part = 0; part < nblocks; part++)
	{
		struct starpu_task *task = starpu_task_create();
		task->cl = &spmv_cl;
	
		task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
		task->handles[1] = vector_in;
		task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part);
	
		ret = starpu_task_submit(task);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			FPRINTF(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();
	end = starpu_timing_now();

	/*
	 *	Unregister the CSR matrix and the output vector
	 */
	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);

	/*
	 *	Unregister data
	 */
	starpu_data_unregister(sparse_matrix);
	starpu_data_unregister(vector_in);
	starpu_data_unregister(vector_out);

	/*
	 *	Display the result
	 */
	for (row = 0; row < STARPU_MIN(size, 16); row++)
	{
                FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]);
	}

	starpu_free(nzval);
	starpu_free(colind);
	starpu_free(rowptr);
	starpu_free(vector_in_ptr);
	starpu_free(vector_out_ptr);

	/*
	 *	Stop StarPU
	 */
	starpu_shutdown();

	timing = end - start;
	FPRINTF(stderr, "Computation took (in ms)\n");
	FPRINTF(stdout, "%2.2f\n", timing/1000);

	return 0;
}
struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
{
	struct starpu_task *task;
	int worker_id;
	unsigned node;

	/* We can't tell in advance which task will be picked up, so we measure
	 * a timestamp, and will attribute it afterwards to the task. */
	int profiling = starpu_profiling_status_get();
	struct timespec pop_start_time;
	if (profiling)
		_starpu_clock_gettime(&pop_start_time);

pick:
	/* perhaps there is some local task to be executed first */
	task = _starpu_pop_local_task(worker);


	/* get tasks from the stacks of the strategy */
	if(!task)
	{
		struct _starpu_sched_ctx *sched_ctx ;
#ifndef STARPU_NON_BLOCKING_DRIVERS
		int been_here[STARPU_NMAX_SCHED_CTXS];
		int i;
		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			been_here[i] = 0;

		while(!task)
#endif
		{
			if(worker->nsched_ctxs == 1)
				sched_ctx = _starpu_get_initial_sched_ctx();
			else
			{
				while(1)
				{
					sched_ctx = _get_next_sched_ctx_to_pop_into(worker);

					if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1)
					{
						_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
						worker->removed_from_ctx[sched_ctx->id] = 0;
						sched_ctx = NULL;
					}
					else
						break;
				}
			}

			if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
			{
				if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task)
				{
					task = sched_ctx->sched_policy->pop_task(sched_ctx->id);
					_starpu_pop_task_end(task);
				}
			}
			
			if(!task)
			{
				/* it doesn't matter if it shares tasks list or not in the scheduler,
				   if it does not have any task to pop just get it out of here */
				/* however if it shares a task list it will be removed as soon as he 
				  finishes this job (in handle_job_termination) */
				if(worker->removed_from_ctx[sched_ctx->id])
				{
					_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
					worker->removed_from_ctx[sched_ctx->id] = 0;
				}
#ifdef STARPU_USE_SC_HYPERVISOR
				if(worker->pop_ctx_priority)
				{
					struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
					if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
					{
//					_STARPU_TRACE_HYPERVISOR_BEGIN();
						perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
//					_STARPU_TRACE_HYPERVISOR_END();
					}
				}
#endif //STARPU_USE_SC_HYPERVISOR
				
#ifndef STARPU_NON_BLOCKING_DRIVERS
				if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1)
					break;

				been_here[sched_ctx->id] = 1;

#endif
			}
		}
	  }


	if (!task)
	{
		idle_start[worker->workerid] = starpu_timing_now();
		return NULL;
	}

	if(idle_start[worker->workerid] != 0.0)
	{
		double idle_end = starpu_timing_now();
		idle[worker->workerid] += (idle_end - idle_start[worker->workerid]);
		idle_start[worker->workerid] = 0.0;
	}
	

#ifdef STARPU_USE_SC_HYPERVISOR
	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
	struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;

	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
	{
//		_STARPU_TRACE_HYPERVISOR_BEGIN();
		perf_counters->notify_poped_task(task->sched_ctx, worker->workerid);
//		_STARPU_TRACE_HYPERVISOR_END();
	}
#endif //STARPU_USE_SC_HYPERVISOR


	/* Make sure we do not bother with all the multiformat-specific code if
	 * it is not necessary. */
	if (!_starpu_task_uses_multiformat_handles(task))
		goto profiling;


	/* This is either a conversion task, or a regular task for which the
	 * conversion tasks have already been created and submitted */
	if (task->mf_skip)
		goto profiling;

	/*
	 * This worker may not be able to execute this task. In this case, we
	 * should return the task anyway. It will be pushed back almost immediatly.
	 * This way, we avoid computing and executing the conversions tasks.
	 * Here, we do not care about what implementation is used.
	 */
	worker_id = starpu_worker_get_id();
	if (!starpu_worker_can_execute_task_first_impl(worker_id, task, NULL))
		return task;

	node = starpu_worker_get_memory_node(worker_id);

	/*
	 * We do have a task that uses multiformat handles. Let's create the
	 * required conversion tasks.
	 */
	STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
	unsigned i;
	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
	for (i = 0; i < nbuffers; i++)
	{
		struct starpu_task *conversion_task;
		starpu_data_handle_t handle;

		handle = STARPU_TASK_GET_HANDLE(task, i);
		if (!_starpu_handle_needs_conversion_task(handle, node))
			continue;
		conversion_task = _starpu_create_conversion_task(handle, node);
		conversion_task->mf_skip = 1;
		conversion_task->execute_on_a_specific_worker = 1;
		conversion_task->workerid = worker_id;
		/*
		 * Next tasks will need to know where these handles have gone.
		 */
		handle->mf_node = node;
		_starpu_task_submit_conversion_task(conversion_task, worker_id);
	}

	task->mf_skip = 1;
	starpu_task_list_push_back(&worker->local_tasks, task);
	STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
	goto pick;

profiling:
	if (profiling)
	{
		struct starpu_profiling_task_info *profiling_info;
		profiling_info = task->profiling_info;

		/* The task may have been created before profiling was enabled,
		 * so we check if the profiling_info structure is available
		 * even though we already tested if profiling is enabled. */
		if (profiling_info)
		{
			memcpy(&profiling_info->pop_start_time,
				&pop_start_time, sizeof(struct timespec));
			_starpu_clock_gettime(&profiling_info->pop_end_time);
		}
	}

	if(task->prologue_callback_pop_func)
		task->prologue_callback_pop_func(task->prologue_callback_pop_arg);

	return task;
}