Esempio n. 1
0
void soclShutdown() {
   static int shutdown = 0;

   if (!shutdown) {
      shutdown = 1;

      starpu_pthread_mutex_lock(&_socl_mutex);
      if( _starpu_init )
         starpu_task_wait_for_all();

      gc_stop();

      if( _starpu_init )
         starpu_task_wait_for_all();

      int active_entities = gc_active_entity_count();

      if (active_entities != 0) {
         DEBUG_MSG("Unreleased entities: %d\n", active_entities);
         gc_print_remaining_entities();
      }

      if( _starpu_init && _starpu_init_failed != -ENODEV)
         starpu_shutdown();
      starpu_pthread_mutex_unlock(&_socl_mutex);

      if (socl_devices != NULL) {
         free(socl_devices);
         socl_devices = NULL;
      }
   }
}
Esempio n. 2
0
int main(int argc, char **argv)
{
	int ret;

	starpu_init(NULL);

	starpu_data_malloc_pinned_if_possible((void **)&buffer, VECTORSIZE);

	starpu_vector_data_register(&v_handle, 0, (uintptr_t)buffer, VECTORSIZE, sizeof(char));

	struct starpu_data_filter f = {
		.filter_func = starpu_vector_divide_in_2_filter_func,
		/* there are only 2 children */
		.nchildren = 2,
		/* the length of the first part */
		.filter_arg = VECTORSIZE/2,
		.get_nchildren = NULL,
		.get_child_ops = NULL
	};

	unsigned iter;
	for (iter = 0; iter < NITER; iter++)
	{
		starpu_data_map_filters(v_handle, 1, &f);
	
		ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 0));
		if (ret == -ENODEV)
			goto enodev;
	
		ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 1));
		if (ret == -ENODEV)
			goto enodev;
	
		starpu_task_wait_for_all();
	
		starpu_data_unpartition(v_handle, 0);
	
		ret = use_handle(v_handle);
		if (ret == -ENODEV)
			goto enodev;
	
		starpu_task_wait_for_all();
	}

	starpu_data_unregister(v_handle);

	starpu_shutdown();

	return 0;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	return 0;
}
Esempio n. 3
0
int main(int argc, char **argv)
{
	double timing;
	struct timeval start;
	struct timeval end;

	starpu_init(NULL);

	fprintf(stderr, "#tasks : %d\n", ntasks);

	unsigned i;
	for (i = 0; i < ntasks; i++)
	{
		struct starpu_task *task = starpu_task_create();

		/* We check if the function is valid from the codelet or from
		 * the callback */
		task->cl = &dummy_cl;
		task->cl_arg = task;

		task->callback_func = check_task_callback;
		task->callback_arg = task;

		int ret = starpu_task_submit(task, NULL);
		STARPU_ASSERT(!ret);
	}

	starpu_task_wait_for_all();
	
	fprintf(stderr, "#empty tasks : %d\n", ntasks);

	/* We repeat the same experiment with null codelets */

	for (i = 0; i < ntasks; i++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = NULL;

		/* We check if the function is valid from the callback */
		task->callback_func = check_task_callback;
		task->callback_arg = task;

		int ret = starpu_task_submit(task, NULL);
		STARPU_ASSERT(!ret);
	}

	starpu_task_wait_for_all();

	starpu_shutdown();

	return 0;
}
int main(int argc, char **argv)
{
	int ntasks = NTASKS;
	int ret;
	struct starpu_conf conf;

	starpu_conf_init(&conf);
	conf.sched_policy = &dummy_sched_policy,
	ret = starpu_init(&conf);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_QUICK_CHECK
	ntasks /= 100;
#endif

	int i;
	for (i = 0; i < ntasks; i++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &dummy_codelet;
		task->cl_arg = NULL;

		ret = starpu_task_submit(task);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	starpu_task_wait_for_all();

	starpu_shutdown();

	return 0;
}
Esempio n. 5
0
static int
run(struct starpu_sched_policy *policy)
{
    int ret;
    struct starpu_conf conf;
    int i;

    starpu_conf_init(&conf);
    conf.sched_policy = policy;
    ret = starpu_init(&conf);
    if (ret != 0)
        exit(STARPU_TEST_SKIPPED);
    starpu_profiling_status_set(1);

    struct starpu_codelet clA =
    {
        .cpu_funcs = {A},
        .nbuffers = 0
    };

    struct starpu_codelet clB =
    {
        .cpu_funcs = {B},
        .nbuffers = 0
    };

    starpu_srand48(0);

    for (i = 0; i < NTASKS; i++)
    {
        struct starpu_task *task = starpu_task_create();

        if (((int)(starpu_drand48()*2))%2)
        {
            task->cl = &clA;
            task->priority=STARPU_MIN_PRIO;
        }
        else
        {
            task->cl = &clB;
            task->priority=STARPU_MAX_PRIO;
        }
        task->detach=1;
        ret = starpu_task_submit(task);
        if (ret == -ENODEV) goto enodev;
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
    }

    starpu_task_wait_for_all();
    FPRINTF(stdout,"\n");

    starpu_shutdown();
    return 0;

enodev:
    starpu_shutdown();
    return -ENODEV;
}
int main(int argc, char **argv)
{
	int ret;
	unsigned loop, nloops=NLOOPS;

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	struct starpu_task *taskA, *taskB;

	for (loop = 0; loop < nloops; loop++)
	{
		taskA = create_dummy_task();
		taskB = create_dummy_task();

		/* By default, dynamically allocated tasks are destroyed at
		 * termination, we cannot declare a dependency on something
		 * that does not exist anymore. */
		taskA->destroy = 0;
		taskA->synchronous = 1;

		ret = starpu_task_submit(taskA);
		if (ret == -ENODEV) goto enodev;
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

		starpu_task_declare_deps_array(taskB, 1, &taskA);

		taskB->synchronous = 1;

		ret = starpu_task_submit(taskB);
		if (ret == -ENODEV) goto enodev;
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

		starpu_task_destroy(taskA);
	}

	ret = starpu_task_wait_for_all();
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

	starpu_shutdown();

	return EXIT_SUCCESS;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	starpu_shutdown();
	return STARPU_TEST_SKIPPED;
}
Esempio n. 7
0
/*main program*/
int main(int argc, char * argv[])
{
    /* Init */
    int ret;
    int mpi_rank, mpi_size;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);

    ret = starpu_init(NULL);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
    ret = starpu_mpi_init(NULL, NULL, 0);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

    /*element initialization : domains are connected as a ring for this test*/
    int num_elements=NUM_EL;
    struct element * el_left=malloc(num_elements*sizeof(el_left[0]));
    struct element * el_right=malloc(num_elements*sizeof(el_right[0]));
    int i;
    for(i=0; i<num_elements; i++)
    {
        init_element(el_left+i,i+1,((mpi_rank-1)+mpi_size)%mpi_size);
        init_element(el_right+i,i+1,(mpi_rank+1)%mpi_size);
    }

    /* Communication loop */
    for (i=0; i<NUM_LOOPS; i++) //number of "computations loops"
    {
        int e;
        for (e=0; e<num_elements; e++) //Do something for each elements
        {
            insert_work_for_one_element(el_right+e);
            insert_work_for_one_element(el_left+e);
        }
    }
    /* End */
    starpu_task_wait_for_all();

    for(i=0; i<num_elements; i++)
    {
        free_element(el_left+i);
        free_element(el_right+i);
    }

    starpu_mpi_shutdown();
    starpu_shutdown();

    MPI_Finalize();
    FPRINTF(stderr, "No assert until end\n");
    return 0;
}
int main(int argc, char **argv)
{
	int ret;

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	unsigned b;
	for (b = 0; b < NBUFFERS; b++)
	{
		buffers[b].index = b;
		starpu_variable_data_register(&buffers[b].handle, STARPU_MAIN_RAM, (uintptr_t)&buffers[b].val, sizeof(unsigned));
	}

	unsigned iter;
	for (iter = 0; iter < NITER; iter++)
	{
		for (b = 0; b < NBUFFERS; b++)
		{
			ret = starpu_data_acquire_cb(buffers[b].handle, STARPU_RW,
						     callback_sync_data, &buffers[b]);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb");
		}
	}

	ret = starpu_task_wait_for_all();
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

	/* do some cleanup */
	ret = EXIT_SUCCESS;
	for (b = 0; b < NBUFFERS; b++)
	{
		starpu_data_unregister(buffers[b].handle);

		/* check result */
		if (buffers[b].val != NITER)
		{
			FPRINTF(stderr, "buffer[%u] = %u should be %d\n", b, buffers[b].val, NITER);
			ret = EXIT_FAILURE;
		}
	}

	starpu_shutdown();

	return ret;
}
Esempio n. 9
0
int main(int argc, char **argv)
{
	int v=40;
	int ret;

	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int));
	double *x = (double*)malloc(sizeof(double));

	struct starpu_task *task = starpu_task_create();
	task->cl = &cl;
	task->prologue_callback_func = callback_func;
	task->prologue_callback_arg = NULL;

	task->prologue_callback_pop_func = pop_prologue_callback_func;
	task->prologue_callback_pop_arg = (void*) 5;

	task->handles[0] = handle;

	ret = starpu_task_submit(task);
	if (ret == -ENODEV) goto enodev;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

	*x = -999.0;
	ret = starpu_task_insert(&cl,
				 STARPU_RW, handle,
				 STARPU_PROLOGUE_CALLBACK, prologue_callback_func,
				 STARPU_PROLOGUE_CALLBACK_ARG, x,
				 STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func,
				 STARPU_PROLOGUE_CALLBACK_POP_ARG, 5,
				 0);
	if (ret == -ENODEV) goto enodev;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	starpu_task_wait_for_all();

enodev:
	starpu_data_unregister(handle);
	free(x);
	FPRINTF(stderr, "v -> %d\n", v);
	starpu_shutdown();
	return (ret == -ENODEV) ? 77 : 0;
}
Esempio n. 10
0
int main(int argc, char **argv)
{
	starpu_init(&conf);

	unsigned i;
	for (i = 0; i < NTASKS; i++)
	{
		struct starpu_task *task = starpu_task_create();
	
		task->cl = &dummy_codelet;
		task->cl_arg = NULL;
	
		starpu_task_submit(task, NULL);
	}

	starpu_task_wait_for_all();

	starpu_shutdown();

	return 0;
}
Esempio n. 11
0
int main(int argc, char **argv)
{
	starpu_init(NULL);

	/* Allocate all buffers and register them to StarPU */
	unsigned b;
	for (b = 0; b < NBUFFERS; b++)
	{
		starpu_data_malloc_pinned_if_possible((void **)&buffer[b], VECTORSIZE);
		starpu_vector_data_register(&v_handle[b], 0,
				(uintptr_t)buffer[b], VECTORSIZE, sizeof(char));
	}

	unsigned iter;
	for (iter = 0; iter < NITER; iter++)
	{
		/* Use the buffers on the different workers so that it may not
		 * be in main memory anymore */
		for (b = 0; b < NBUFFERS; b++)
			use_handle(v_handle[b]);
	
		starpu_task_wait_for_all();

		/* Grab the different pieces of data into main memory */
		for (b = 0; b < NBUFFERS; b++)
			starpu_data_acquire(v_handle[b], STARPU_RW);

		/* Release them */
		for (b = 0; b < NBUFFERS; b++)
			starpu_data_release(v_handle[b]);
	}

	/* do some cleanup */
	for (b = 0; b < NBUFFERS; b++)
		starpu_data_unregister(v_handle[b]);

	starpu_shutdown();

	return 0;
}
Esempio n. 12
0
int main(int argc, char **argv)
{
     int ret;
     int var = 42;
     starpu_data_handle_t handle;

     ret = starpu_init(NULL);
     if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
     STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

     int copy = starpu_asynchronous_copy_disabled();
     FPRINTF(stderr, "copy %d\n", copy);

     starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));

     ret = starpu_task_insert(&cl,
			      STARPU_R, handle,
			      0);
     if (ret == -ENODEV) goto enodev;
     STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

     starpu_task_wait_for_all();

     starpu_data_unregister(handle);

     starpu_shutdown();

     return 0;

enodev:
     starpu_data_unregister(handle);
     starpu_shutdown();
     /* yes, we do not perform the computation but we did detect that no one
      * could perform the kernel, so this is not an error from StarPU */
     fprintf(stderr, "WARNING: No one can execute this task\n");
     return STARPU_TEST_SKIPPED;
}
int main(int argc, char **argv)
{
	int ret;

#ifdef STARPU_QUICK_CHECK
	nbuffers /= 4;
	niter /= 4;
	vectorsize /= 8;
#endif

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/* Allocate all buffers and register them to StarPU */
	int b;
	for (b = 0; b < nbuffers; b++)
	{
		ret = starpu_malloc((void **)&buffer[b], vectorsize);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
		starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM,
				(uintptr_t)buffer[b], vectorsize, sizeof(char));
	}

	int iter;
	for (iter = 0; iter < niter; iter++)
	{
		/* Use the buffers on the different workers so that it may not
		 * be in main memory anymore */
		for (b = 0; b < nbuffers; b++)
		{
			ret = use_handle(v_handle[b]);
			if (ret == -ENODEV) goto enodev;
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		ret = starpu_task_wait_for_all();
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

		/* Grab the different pieces of data into main memory */
		for (b = 0; b < nbuffers; b++)
		{
			ret = starpu_data_acquire(v_handle[b], STARPU_RW);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
		}

		/* Release them */
		for (b = 0; b < nbuffers; b++)
			starpu_data_release(v_handle[b]);
	}

	/* do some cleanup */
	for (b = 0; b < nbuffers; b++)
	{
		starpu_data_unregister(v_handle[b]);
		starpu_free(buffer[b]);
	}

	starpu_shutdown();

	return EXIT_SUCCESS;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	starpu_shutdown();
	return STARPU_TEST_SKIPPED;
}
Esempio n. 14
0
int main(int argc, char **argv)
{
	unsigned i;
        float foo;
	starpu_data_handle float_array_handle;
	starpu_codelet cl;

	starpu_init(NULL);
        if (argc == 2) niter = atoi(argv[1]);
        foo = 0.0f;

	starpu_variable_data_register(&float_array_handle, 0 /* home node */,
                                      (uintptr_t)&foo, sizeof(float));

#ifdef STARPU_USE_OPENCL
        starpu_opencl_load_opencl_from_file("examples/basic_examples/variable_kernels_opencl_codelet.cl", &opencl_code);
#endif

	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL;
        cl.cpu_func = cpu_codelet;
#ifdef STARPU_USE_CUDA
        cl.cuda_func = cuda_codelet;
#endif
#ifdef STARPU_USE_OPENCL
        cl.opencl_func = opencl_codelet;
#endif
        cl.nbuffers = 1;
        cl.model = NULL;

	for (i = 0; i < niter; i++)
	{
		struct starpu_task *task = starpu_task_create();
                int ret;

		task->cl = &cl;

		task->callback_func = NULL;

		task->buffers[0].handle = float_array_handle;
		task->buffers[0].mode = STARPU_RW;

		ret = starpu_task_submit(task, NULL);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			fprintf(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();

	/* update the array in RAM */
	starpu_data_acquire(float_array_handle, STARPU_R);

	fprintf(stderr, "variable -> %f\n", foo);

	starpu_data_release(float_array_handle);

	starpu_shutdown();

	return 0;
}
Esempio n. 15
0
int main(int argc, char **argv)
{
	int size, x;
	int color;
	MPI_Comm newcomm;
	int rank, newrank;
	int ret;
	starpu_data_handle_t data[3];
	int value = 90;

        MPI_Init(&argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        MPI_Comm_size(MPI_COMM_WORLD, &size);

        if (size < 4)
        {
		FPRINTF(stderr, "We need at least 4 processes.\n");
                MPI_Finalize();
                return STARPU_TEST_SKIPPED;
        }

	color = rank%2;
	MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm);
	MPI_Comm_rank(newcomm, &newrank);
	FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color);

	if (newrank == 0)
	{
		FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank);
		MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm);
	}
	else if (newrank == 1)
	{
		MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE);
		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x);
	}

        ret = starpu_init(NULL);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
        ret = starpu_mpi_init(NULL, NULL, 0);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (rank == 0)
	{
		starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int));
	}
	else
		starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int));
	starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD);

	if (newrank == 0)
	{
		starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
		starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
	}
	else
		starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int));
	starpu_mpi_data_register_comm(data[0], 12, 0, newcomm);

	if (newrank == 0)
	{
		starpu_mpi_req req[2];
		starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm);
		starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm);
		starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE);
		starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE);
	}
	else if (newrank == 1)
	{
		int *xx;

		starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[0], STARPU_RW);
		xx = (int *)starpu_variable_get_local_ptr(data[0]);
		starpu_data_release(data[0]);
		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);

		starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int));
		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
		starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[0], STARPU_RW);
		xx = (int *)starpu_variable_get_local_ptr(data[0]);
		starpu_data_release(data[0]);
		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
	}

	if (rank == 0)
	{
		starpu_data_acquire(data[2], STARPU_RW);
		int rvalue = *((int *)starpu_variable_get_local_ptr(data[2]));
		starpu_data_release(data[2]);
		FPRINTF_MPI(stderr, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1);
		starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD);
		starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[2], STARPU_RW);
		int *xx = (int *)starpu_variable_get_local_ptr(data[2]);
		starpu_data_release(data[2]);
		FPRINTF_MPI(stderr, "Value back is %d\n", *xx);
		STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1)));
	}
	else
	{
		int next = (rank == size-1) ? 0 : rank+1;
		starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[2], STARPU_RW);
		int *xx = (int *)starpu_variable_get_local_ptr(data[2]);
		FPRINTF_MPI(stderr, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next);
		*xx = *xx + 2;
		starpu_data_release(data[2]);
		starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD);
	}

	if (newrank == 0 || newrank == 1)
	{
		starpu_mpi_insert_task(newcomm, &mycodelet,
				       STARPU_RW, data[0],
				       STARPU_VALUE, &x, sizeof(x),
				       STARPU_EXECUTE_ON_NODE, 1,
				       0);

		starpu_task_wait_for_all();
		starpu_data_unregister(data[0]);
		starpu_data_unregister(data[1]);
	}
	starpu_data_unregister(data[2]);

	starpu_mpi_shutdown();
	starpu_shutdown();
	MPI_Comm_free(&newcomm);
        MPI_Finalize();
	return 0;
}
int main(int argc, char **argv)
{
	int ret, rank, size, err, node;
	long x0=32;
	int x1=23;
	starpu_data_handle_t data_handlesx0;
	starpu_data_handle_t data_handlesx1;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (rank != 0 && rank != 1) goto end;

	if (rank == 0)
	{
		starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0));
		starpu_mpi_data_register(data_handlesx0, 0, rank);
		starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1));
		starpu_mpi_data_register(data_handlesx1, 1, 1);
	}
	else if (rank == 1)
	{
		starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1));
		starpu_mpi_data_register(data_handlesx1, 1, rank);
		starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0));
		starpu_mpi_data_register(data_handlesx0, 0, 0);
	}

	node = starpu_mpi_data_get_rank(data_handlesx1);
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1,
				     0);
	assert(err == 0);

	node = starpu_mpi_data_get_rank(data_handlesx0);
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1,
				     0);
	assert(err == 0);

	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
				     0);
	assert(err == 0);

	node = 1;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	/* Here the value specified by the property STARPU_EXECUTE_ON_NODE is
	   going to overwrite the node even though the data model clearly specifies
	   which node is going to execute the codelet */
	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	/* Here the value specified by the property STARPU_EXECUTE_ON_NODE is
	   going to overwrite the node even though the data model clearly specifies
	   which node is going to execute the codelet */
	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	FPRINTF_MPI(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();
	starpu_data_unregister(data_handlesx0);
	starpu_data_unregister(data_handlesx1);

end:
	starpu_mpi_shutdown();
	starpu_shutdown();

	return 0;
}
Esempio n. 17
0
int main(int argc, char **argv)
{
	int ret = 0;
	starpu_data_handle_t handle1;
	starpu_data_handle_t handle2;

	double real = 45.0;
	double imaginary = 12.0;
	double copy_real = 78.0;
	double copy_imaginary = 78.0;

	int compare;
	int *compare_ptr = &compare;

	ret = starpu_init(NULL);
	if (ret == -ENODEV) return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif
	starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1);
	starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, &copy_real, &copy_imaginary, 1);

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_compare,
				 STARPU_R, handle1,
				 STARPU_R, handle2,
				 STARPU_VALUE, &compare_ptr, sizeof(compare_ptr),
				 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
	starpu_task_wait_for_all();
	if (compare != 0)
	{
	     FPRINTF(stderr, "Complex numbers should NOT be similar\n");
	     goto end;
	}

	ret = starpu_task_insert(&cl_copy,
				 STARPU_R, handle1,
				 STARPU_W, handle2,
				 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	ret = starpu_task_insert(&cl_compare,
				 STARPU_R, handle1,
				 STARPU_R, handle2,
				 STARPU_VALUE, &compare_ptr, sizeof(compare_ptr),
				 0);
	if (ret == -ENODEV) goto end;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

	starpu_task_wait_for_all();

	if (compare != 1)
	{
	     FPRINTF(stderr, "Complex numbers should be similar\n");
	}

end:
#ifdef STARPU_USE_OPENCL
	{
	     int ret2 = starpu_opencl_unload_opencl(&opencl_program);
	     STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl");
	}
#endif
	starpu_data_unregister(handle1);
	starpu_data_unregister(handle2);
	starpu_shutdown();
	if (ret == -ENODEV) return 77; else return !compare;
}
Esempio n. 18
0
int main(int argc, char **argv)
{
	int ret;

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif
	A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned));

	starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned));
	starpu_vector_data_register(&B_handle, -1, (uintptr_t)NULL, VECTORSIZE, sizeof(unsigned));

	unsigned loop;
	for (loop = 0; loop < NLOOPS; loop++)
	{
		struct starpu_task *task_f = starpu_task_create();
		task_f->cl = &cl_f;
		task_f->handles[0] = A_handle;
		task_f->handles[1] = B_handle;

		ret = starpu_task_submit(task_f);
		if (ret == -ENODEV) goto enodev;
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	ret = starpu_task_wait_for_all();
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	starpu_shutdown();

	/* Check result */
	unsigned i;
	ret = EXIT_SUCCESS;
	for (i = 0; i < VECTORSIZE; i++)
	{
		if (A[i] != NLOOPS)
		{
			FPRINTF(stderr, "Error: Incorrect value A[%u] = %u != %d\n", i, A[i], NLOOPS);
			ret = EXIT_FAILURE;
			break;
		}
	}

	free(A);
	STARPU_RETURN(ret);

enodev:
	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	starpu_shutdown();
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	fprintf(stderr, "WARNING: No one can execute this task\n");
	return STARPU_TEST_SKIPPED;
}
int main(int argc, char **argv)
{
	int rank, nodes;
	int ret=0;
	int compare=0;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);

	if (nodes < 2)
	{
		fprintf(stderr, "This program needs at least 2 nodes (%d available)\n", nodes);
		ret = 77;
	}
	else
	{
		starpu_data_handle_t handle;
		starpu_data_handle_t handle2;

		double real[2] = {4.0, 2.0};
		double imaginary[2] = {7.0, 9.0};

		double real2[2] = {14.0, 12.0};
		double imaginary2[2] = {17.0, 19.0};

		if (rank == 1)
		{
			real[0] = 0.0;
			real[1] = 0.0;
			imaginary[0] = 0.0;
			imaginary[1] = 0.0;
		}

		starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2);
		starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);

		if (rank == 0)
		{
			int *compare_ptr = &compare;

			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0);
			starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL);
			starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL);

			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0);
			starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
		}
		else if (rank == 1)
		{
			starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL);
			starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0);
			starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL);
		}

		starpu_task_wait_for_all();

		starpu_data_unregister(handle);
		starpu_data_unregister(handle2);
	}

	starpu_mpi_shutdown();
	starpu_shutdown();

	if (rank == 0) return !compare; else return ret;
}
Esempio n. 20
0
int do_test(int rank, int sdetached, int rdetached)
{
	int ret, i;
	int val[2];
	starpu_data_handle_t data[2];

	ret = starpu_init(NULL);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
        ret = starpu_mpi_init(NULL, NULL, 0);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (rank == 1)
	{
		val[0] = VAL0;
		val[1] = VAL1;
	}
	else
	{
		val[0] = -1;
		val[1] = -1;
	}
	starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0]));
	starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1]));
	starpu_mpi_data_register(data[0], 77, 1);
	starpu_mpi_data_register(data[1], 88, 1);

	if (rank == 1)
	{
		for(i=1 ; i>=0 ; i--)
		{
			if (sdetached)
				starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL);
			else
				starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD);
		}
	}
	else if (rank == 0)
	{
		int received = 0;

		for(i=0 ; i<2 ; i++)
			FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]);
		for(i=0 ; i<2 ; i++)
		{
			if (rdetached)
				starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received);
			else
				starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		}

		if (rdetached)
		{
			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			while (received != 2)
			{
				FPRINTF_MPI(stderr, "Received %d messages\n", received);
				STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
			}
			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
		}

		for(i=0 ; i<2 ; i++)
			starpu_data_acquire(data[i], STARPU_R);
		for(i=0 ; i<2 ; i++)
			FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]);
		for(i=0 ; i<2 ; i++)
			starpu_data_release(data[i]);
	}
	FPRINTF_MPI(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();

	starpu_data_unregister(data[0]);
	starpu_data_unregister(data[1]);

	if (rank == 0)
	{
		ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1;
	}
	starpu_mpi_shutdown();
	starpu_shutdown();
	return ret;
}
int main(int argc, char **argv)
{
    unsigned *foo;
    starpu_data_handle_t handle;
    int ret;
    unsigned n, i, size;

    ret = starpu_initialize(NULL, &argc, &argv);
    if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
    ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

    n = starpu_worker_get_count();
    if (n == 1)
    {
        starpu_shutdown();
        return STARPU_TEST_SKIPPED;
    }

    size = 10 * n;

    foo = (unsigned *) calloc(size, sizeof(*foo));
    for (i = 0; i < size; i++)
        foo[i] = i;

    starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo));

    /* Broadcast the data to force in-place partitioning */
    for (i = 0; i < n; i++)
        starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);

    struct starpu_data_filter f =
    {
        .filter_func = starpu_vector_filter_block,
        .nchildren = n,
    };

    starpu_data_partition(handle, &f);

    for (i = 0; i < f.nchildren; i++)
    {
        struct starpu_task *task = starpu_task_create();

        task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
        task->cl = &scal_codelet;
        task->execute_on_a_specific_worker = 1;
        task->workerid = i;

        ret = starpu_task_submit(task);
        if (ret == -ENODEV) goto enodev;
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
    }

    ret = starpu_task_wait_for_all();
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

    starpu_data_unpartition(handle, STARPU_MAIN_RAM);
    starpu_data_unregister(handle);
    starpu_shutdown();

    ret = EXIT_SUCCESS;
    for (i = 0; i < size; i++)
    {
        if (foo[i] != i*2)
        {
            FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i);
            ret = EXIT_FAILURE;
        }
    }

    return ret;

enodev:
    starpu_data_unregister(handle);
    fprintf(stderr, "WARNING: No one can execute this task\n");
    /* yes, we do not perform the computation but we did detect that no one
     * could perform the kernel, so this is not an error from StarPU */
    starpu_shutdown();
    return STARPU_TEST_SKIPPED;
}
int main(int argc, char **argv)
{
	int ret, rank, size;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(NULL, NULL, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (size < 2)
	{
		if (rank == 0)
			FPRINTF(stderr, "We need at least 2 processes.\n");

		MPI_Finalize();
		return STARPU_TEST_SKIPPED;
	}


	starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token));

	int nloops = NITER;
	int loop;

	int last_loop = nloops - 1;
	int last_rank = size - 1;

	for (loop = 0; loop < nloops; loop++)
	{
		int tag = loop*size + rank;

		if (loop == 0 && rank == 0)
		{
			token = 0;
			FPRINTF(stdout, "Start with token value %u\n", token);
		}
		else
		{
			starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL);
		}

		increment_token();

		if (loop == last_loop && rank == last_rank)
		{
			starpu_data_acquire(token_handle, STARPU_R);
			FPRINTF(stdout, "Finished : token value %u\n", token);
			starpu_data_release(token_handle);
		}
		else
		{
			starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL);
		}
	}

	starpu_task_wait_for_all();

	starpu_data_unregister(token_handle);
	starpu_mpi_shutdown();
	starpu_shutdown();

	if (rank == last_rank)
	{
		FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size);
		STARPU_ASSERT(token == nloops*size);
	}

	return 0;
}
Esempio n. 23
0
int main(int argc, char **argv)
{
	double start, end;
	int ret;

	parse_args(argc, argv);

#ifdef STARPU_QUICK_CHECK
	niter /= 10;
#endif

	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	starpu_cublas_init();

	init_problem_data();
	partition_mult_data();

	if (bound)
		starpu_bound_start(0, 0);

	start = starpu_timing_now();

	unsigned x, y, iter;
	for (iter = 0; iter < niter; iter++)
	{
		for (x = 0; x < nslicesx; x++)
		for (y = 0; y < nslicesy; y++)
		{
			struct starpu_task *task = starpu_task_create();

			task->cl = &cl;

			task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y);
			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x);
			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y);

			task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim;

			ret = starpu_task_submit(task);
			if (ret == -ENODEV)
			{
			     ret = 77;
			     goto enodev;
			}
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		starpu_task_wait_for_all();
	}


	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	double min, min_int;
	double flops = 2.0*((unsigned long long)niter)*((unsigned long long)xdim)
		           *((unsigned long long)ydim)*((unsigned long long)zdim);

	if (bound)
		starpu_bound_compute(&min, &min_int, 1);

	PRINTF("# x\ty\tz\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops\tTims\tTiGFlops");
	PRINTF("\n");
	PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0);
	if (bound)
		PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0);
	PRINTF("\n");

enodev:
	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);

	starpu_data_unregister(A_handle);
	starpu_data_unregister(B_handle);
	starpu_data_unregister(C_handle);

	if (check)
		check_output();

	starpu_free(A);
	starpu_free(B);
	starpu_free(C);

	starpu_cublas_shutdown();
	starpu_shutdown();

	return ret;
}
int main(int argc, char **argv)
{
	int rank, size, err;
	int x[2];
	int ret, i;
	starpu_data_handle_t data_handles[2];
	int values[2];

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (rank == 0)
	{
		x[0] = 11;
		starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0]));
		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
	}
	else if (rank == 1)
	{
		x[1] = 12;
		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
		starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1]));
	}
	else
	{
		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
	}

	starpu_mpi_data_register(data_handles[0], 0, 0);
	starpu_mpi_data_register(data_handles[1], 1, 1);

	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
				     STARPU_RW, data_handles[0], STARPU_RW, data_handles[1],
				     STARPU_EXECUTE_ON_DATA, data_handles[1],
				     0);
	assert(err == 0);
	starpu_task_wait_for_all();

	for(i=0 ; i<2 ; i++)
	{
		starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL);
		if (rank == 0)
		{
			starpu_data_acquire(data_handles[i], STARPU_R);
			values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i]));
			starpu_data_release(data_handles[i]);		}
	}
	ret = 0;
	if (rank == 0)
	{
		FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]);
		if (values[0] != 12 || values[1] != 144)
		{
			ret = EXIT_FAILURE;
		}
	}

	starpu_data_unregister(data_handles[0]);
	starpu_data_unregister(data_handles[1]);

	starpu_mpi_shutdown();
	starpu_shutdown();

	return ret;
}
Esempio n. 25
0
int main(int argc, char **argv)
{
	int ret, exit_value = 0;

	/* Initialize StarPU */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl",
						  &opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

	starpu_cublas_init();

	/* This is equivalent to
		vec_a = malloc(N*sizeof(TYPE));
		vec_b = malloc(N*sizeof(TYPE));
	*/
	starpu_malloc((void **)&_vec_x, N*sizeof(TYPE));
	assert(_vec_x);

	starpu_malloc((void **)&_vec_y, N*sizeof(TYPE));
	assert(_vec_y);

	unsigned i;
	for (i = 0; i < N; i++)
	{
		_vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */
		_vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */
	}

	FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]);
	FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]);

	/* Declare the data to StarPU */
	starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE));
	starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE));

	/* Divide the vector into blocks */
	struct starpu_data_filter block_filter =
	{
		.filter_func = starpu_vector_filter_block,
		.nchildren = NBLOCKS
	};

	starpu_data_partition(_handle_x, &block_filter);
	starpu_data_partition(_handle_y, &block_filter);

	double start;
	double end;

	start = starpu_timing_now();

	unsigned b;
	for (b = 0; b < NBLOCKS; b++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &axpy_cl;

		task->cl_arg = &_alpha;
		task->cl_arg_size = sizeof(_alpha);

		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);

		task->tag_id = b;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV)
		{
			exit_value = 77;
			goto enodev;
		}
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
	}

	starpu_task_wait_for_all();

enodev:
	starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM);
	starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM);
	starpu_data_unregister(_handle_x);
	starpu_data_unregister(_handle_y);

	end = starpu_timing_now();
        double timing = end - start;

	FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing);

	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha);

	if (exit_value != 77)
		exit_value = check();

	starpu_free((void *)_vec_x);
	starpu_free((void *)_vec_y);

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	/* Stop StarPU */
	starpu_shutdown();

	return exit_value;
}
Esempio n. 26
0
static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
		ret = create_task_11(dataA, k);
		if (ret == -ENODEV) return ret;

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataA, k, i, j);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, i));
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	unsigned n = starpu_matrix_get_nx(dataA);
	double flop = (2.0f*n*n*n)/3.0f;

	PRINTF("# size\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops");
	PRINTF("\n");
	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
	if (bound)
	{
		double min;
		starpu_bound_compute(&min, NULL, 0);
		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
	}
	PRINTF("\n");

	return 0;
}
static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
				  struct piv_s *piv_description,
				  unsigned nblocks,
				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
				  double *timing)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
	     ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
	     if (ret == -ENODEV) return ret;

		for (i = 0; i < nblocks; i++)
		{
			if (i != k)
			{
			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
			     if (ret == -ENODEV) return ret;
			}
		}

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(get_block(dataAp, nblocks, k, i));
		    starpu_data_wont_use(get_block(dataAp, nblocks, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	*timing = end - start;
	return 0;
}
Esempio n. 28
0
int main(int argc, char **argv)
{
	int my_rank, size, x, y, loop;
	float mean=0;
	float matrix[X][Y];
	starpu_data_handle_t data_handles[X][Y];

	int ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	starpu_mpi_init(&argc, &argv, 1);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	parse_args(argc, argv);

	/* Initial data values */
	starpu_srand48((long int)time(NULL));
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			matrix[x][y] = (float)starpu_drand48();
			mean += matrix[x][y];
		}
	}
	mean /= (X*Y);

	if (display)
	{
		FPRINTF_MPI(stdout, "mean=%2.2f\n", mean);
		for(x = 0; x < X; x++)
		{
			fprintf(stdout, "[%d] ", my_rank);
			for (y = 0; y < Y; y++)
			{
				fprintf(stdout, "%2.2f ", matrix[x][y]);
			}
			fprintf(stdout, "\n");
		}
	}

	/* Initial distribution */
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			int mpi_rank = my_distrib(x, y, size);
			if (mpi_rank == my_rank)
			{
				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float));
			}
			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
			{
				/* I don't own that index, but will need it for my computations */
				//FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
			}
			else
			{
				/* I know it's useless to allocate anything for this */
				data_handles[x][y] = NULL;
			}
			if (data_handles[x][y])
			{
				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
			}
		}
	}

	/* First computation with initial distribution */
	for(loop=0 ; loop<niter; loop++)
	{
		for (x = 1; x < X-1; x++)
		{
			for (y = 1; y < Y-1; y++)
			{
				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
						       0);
			}
		}
	}
	FPRINTF(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();

	/* Now migrate data to a new distribution */

	/* First register newly needed data */
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			int mpi_rank = my_distrib2(x, y, size);
			if (!data_handles[x][y] && (mpi_rank == my_rank
				 || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size)
				 || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size)))
			{
				/* Register newly-needed data */
				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
			}
			if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y]))
			{
				/* Migrate the data */
				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
				/* And register new rank of the matrix */
				starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank);
			}
		}
	}

	/* Second computation with new distribution */
	for(loop=0 ; loop<niter; loop++)
	{
		for (x = 1; x < X-1; x++)
		{
			for (y = 1; y < Y-1; y++)
			{
				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
						       0);
			}
		}
	}
	FPRINTF(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();

	/* Unregister data */
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			if (data_handles[x][y])
			{
				int mpi_rank = my_distrib(x, y, size);
				/* Get back data to original place where the user-provided buffer is. */
				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
				/* Register original rank of the matrix (although useless) */
				starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank);
				/* And unregister it */
				starpu_data_unregister(data_handles[x][y]);
			}
		}
	}

	starpu_mpi_shutdown();
	starpu_shutdown();

	if (display)
	{
		FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean);
		for(x = 0; x < X; x++)
		{
			FPRINTF(stdout, "[%d] ", my_rank);
			for (y = 0; y < Y; y++)
			{
				FPRINTF(stdout, "%2.2f ", matrix[x][y]);
			}
			FPRINTF(stdout, "\n");
		}
	}

	return 0;
}
int main(int argc, char **argv)
{
        int i, j, ret;

	ret = starpu_initialize(NULL, &argc, &argv);
	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_initialize");

	float *data;
	starpu_malloc((void**)&data, sizeof(*data) * NB_BUNDLE);
	float factors[NB_BUNDLE];
        starpu_data_handle_t handles[NB_BUNDLE];

	struct starpu_task *task[NB_ITERATION];

	starpu_task_bundle_t bundles[NB_BUNDLE];

	for (i = 0; i < NB_BUNDLE; i++)
	{
		data[i] = i + 1;
		factors[i] = NB_BUNDLE - i;
	}

	for (i = 0; i < NB_BUNDLE; i++)
		starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(float));

        FPRINTF(stderr, "VALUES:");
	for (i = 0; i < NB_BUNDLE; i++)
		FPRINTF(stderr, " %f (%f)", data[i], factors[i]);
        FPRINTF(stderr, "\n");

	for (i = 0; i < NB_BUNDLE; i++)
	{
		starpu_task_bundle_create(&bundles[i]);

		for (j = 0; j < NB_ITERATION; j++)
		{
			task[j] = starpu_task_create();

			task[j]->cl = &codelet;

			task[j]->cl_arg = &factors[i];
			task[j]->cl_arg_size = sizeof(float);

			task[j]->handles[0] = handles[i];

			ret = starpu_task_bundle_insert(bundles[i], task[j]);
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
		}

		ret = starpu_task_bundle_remove(bundles[i], task[NB_ITERATION / 2]);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_remove");

		for (j = 0; j < NB_ITERATION; j++)
		{
			ret = starpu_task_submit(task[j]);
			if (ret == -ENODEV) goto enodev;
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		starpu_task_bundle_close(bundles[i]);
	}

        ret = starpu_task_wait_for_all();
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");

        for(i = 0; i < NB_BUNDLE ; i++)
	{
                ret = starpu_data_acquire(handles[i], STARPU_R);
		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
        }

        FPRINTF(stderr, "VALUES:");
	for (i = 0; i < NB_BUNDLE; i++)
		FPRINTF(stderr, " %f (%f)", data[i], factors[i]);
        FPRINTF(stderr, "\n");

        for(i = 0; i < NB_BUNDLE ; i++)
	{
                starpu_data_release(handles[i]);
		starpu_data_unregister(handles[i]);
	}

		starpu_free(data);

	starpu_shutdown();

	return EXIT_SUCCESS;

enodev:
	starpu_shutdown();
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	return STARPU_TEST_SKIPPED;
}
Esempio n. 30
0
int main(int argc, char **argv)
{
	int ret;
	unsigned part;
	double timing;
	double start, end;
	unsigned row, pos;
	unsigned ind;

	/* CSR matrix description */
	float *nzval;
	uint32_t nnz;
	uint32_t *colind;
	uint32_t *rowptr;
	
	/* Input and Output vectors */
	float *vector_in_ptr;
	float *vector_out_ptr;

	/*
	 *	Parse command-line arguments
	 */
	parse_args(argc, argv);

	/*
	 *	Launch StarPU
	 */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/*
	 *	Create a 3-band sparse matrix as input example
	 */
	nnz = 3*size-2;
	starpu_malloc((void **)&nzval, nnz*sizeof(float));
	starpu_malloc((void **)&colind, nnz*sizeof(uint32_t));
	starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t));
	assert(nzval && colind && rowptr);

	/* fill the matrix */
	for (row = 0, pos = 0; row < size; row++)
	{
		rowptr[row] = pos;

		if (row > 0)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row-1;
			pos++;
		}
		
		nzval[pos] = 5.0f;
		colind[pos] = row;
		pos++;

		if (row < size - 1)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row+1;
			pos++;
		}
	}

	STARPU_ASSERT(pos == nnz);

	rowptr[size] = nnz;
	
	/* initiate the 2 vectors */
	starpu_malloc((void **)&vector_in_ptr, size*sizeof(float));
	starpu_malloc((void **)&vector_out_ptr, size*sizeof(float));
	assert(vector_in_ptr && vector_out_ptr);

	/* fill them */
	for (ind = 0; ind < size; ind++)
	{
		vector_in_ptr[ind] = 2.0f;
		vector_out_ptr[ind] = 0.0f;
	}

	/*
	 *	Register the CSR matrix and the 2 vectors
	 */
	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));

	/*
	 *	Partition the CSR matrix and the output vector
	 */
	csr_f.nchildren = nblocks;
	vector_f.nchildren = nblocks;
	starpu_data_partition(sparse_matrix, &csr_f);
	starpu_data_partition(vector_out, &vector_f);

	/*
	 *	If we use OpenCL, we need to compile the SpMV kernel
	 */
#ifdef STARPU_USE_OPENCL
	compile_spmv_opencl_kernel();
#endif

	start = starpu_timing_now();

	/*
	 *	Create and submit StarPU tasks
	 */
	for (part = 0; part < nblocks; part++)
	{
		struct starpu_task *task = starpu_task_create();
		task->cl = &spmv_cl;
	
		task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
		task->handles[1] = vector_in;
		task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part);
	
		ret = starpu_task_submit(task);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			FPRINTF(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();
	end = starpu_timing_now();

	/*
	 *	Unregister the CSR matrix and the output vector
	 */
	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);

	/*
	 *	Unregister data
	 */
	starpu_data_unregister(sparse_matrix);
	starpu_data_unregister(vector_in);
	starpu_data_unregister(vector_out);

	/*
	 *	Display the result
	 */
	for (row = 0; row < STARPU_MIN(size, 16); row++)
	{
                FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]);
	}

	starpu_free(nzval);
	starpu_free(colind);
	starpu_free(rowptr);
	starpu_free(vector_in_ptr);
	starpu_free(vector_out_ptr);

	/*
	 *	Stop StarPU
	 */
	starpu_shutdown();

	timing = end - start;
	FPRINTF(stderr, "Computation took (in ms)\n");
	FPRINTF(stdout, "%2.2f\n", timing/1000);

	return 0;
}