int main(int argc, char **argv)
{
	int ret, rank, size;
	starpu_data_handle_t handle;
	int var;

	MPI_Init(&argc, &argv);
	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(NULL, NULL, 0);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (size<3)
	{
		FPRINTF(stderr, "We need more than 2 processes.\n");
		starpu_mpi_shutdown();
		starpu_shutdown();
		MPI_Finalize();
		return STARPU_TEST_SKIPPED;
	}

	if (rank == 0)
	{
		int n;
		for(n=1 ; n<size ; n++)
		{
			MPI_Status status;

			FPRINTF_MPI(stderr, "receiving from node %d\n", n);
			starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
			starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status);
			starpu_data_acquire(handle, STARPU_R);
			STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n);
			FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n);
			starpu_data_release(handle);
			starpu_data_unregister(handle);
		}
	}
	else
	{
		FPRINTF_MPI(stderr, "sending to node %d\n", 0);
		var = rank;
		starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var));
		starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD);
		starpu_data_unregister(handle);
	}

	starpu_mpi_shutdown();
	starpu_shutdown();
	MPI_Finalize();

	return 0;
}
int main(int argc, char **argv)
{
	int ret=0, global_ret=0;
	int rank, size;

	MPI_Init(&argc, &argv);
	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
	starpu_mpi_comm_size(MPI_COMM_WORLD, &size);

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(NULL, NULL, 0);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (size%2 != 0)
	{
		FPRINTF(stderr, "We need a even number of processes.\n");
		starpu_mpi_shutdown();
		starpu_shutdown();
		MPI_Finalize();
		return STARPU_TEST_SKIPPED;
	}

	ret = exchange_variable(rank, 0);
	if (ret != 0) global_ret = ret;

	ret = exchange_variable(rank, 1);
	if (ret != 0) global_ret = ret;

	ret = exchange_void(rank, 0);
	if (ret != 0) global_ret = ret;

	ret = exchange_void(rank, 1);
	if (ret != 0) global_ret = ret;

	ret = exchange_complex(rank, 0);
	if (ret != 0) global_ret = ret;

	ret = exchange_complex(rank, 1);
	if (ret != 0) global_ret = ret;

	starpu_mpi_shutdown();
	starpu_shutdown();

	MPI_Finalize();

	return global_ret;
}
int main(int argc, char **argv)
{
	int ret, rank, size;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (size%2 != 0)
	{
		if (rank == 0)
			FPRINTF(stderr, "We need a even number of processes.\n");

		MPI_Finalize();
		return STARPU_TEST_SKIPPED;
	}

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(NULL, NULL, 0);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	tab = malloc(SIZE*sizeof(float));

	starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float));

	int nloops = NITER;
	int loop;
	int other_rank = rank%2 == 0 ? rank+1 : rank-1;

	for (loop = 0; loop < nloops; loop++)
	{
		starpu_tag_t tag = (starpu_tag_t)loop;

		if ((loop % 2) == (rank%2))
		{
			starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag);
		}
		else
		{
			starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag);
		}

		starpu_tag_wait(tag);
	}

	starpu_data_unregister(tab_handle);
	free(tab);

	starpu_mpi_shutdown();
	starpu_shutdown();

	MPI_Finalize();

	return 0;
}
Exemple #4
0
int main(int argc, char **argv)
{
	MPI_Init(NULL, NULL);

	int rank, size;

	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (size != 2)
	{
		if (rank == 0)
			fprintf(stderr, "We need exactly 2 processes.\n");

		MPI_Finalize();
		return 0;
	}

	starpu_init(NULL);
	starpu_mpi_initialize();

	tab = malloc(SIZE*sizeof(float));

	starpu_vector_data_register(&tab_handle, 0, (uintptr_t)tab, SIZE, sizeof(float));

	unsigned nloops = NITER;
	unsigned loop;

	int other_rank = (rank + 1)%2;

	for (loop = 0; loop < nloops; loop++)
	{
		starpu_tag_t tag = (starpu_tag_t)loop;

		if ((loop % 2) == rank)
		{
			starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag);
		}
		else {
			starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag);
		}

		starpu_tag_wait(tag);
	}
	
	starpu_mpi_shutdown();
	starpu_shutdown();

	MPI_Finalize();

	return 0;
}
/*main program*/
int main(int argc, char * argv[])
{
    /* Init */
    int ret;
    int mpi_rank, mpi_size;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);

    ret = starpu_init(NULL);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
    ret = starpu_mpi_init(NULL, NULL, 0);
    STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

    /*element initialization : domains are connected as a ring for this test*/
    int num_elements=NUM_EL;
    struct element * el_left=malloc(num_elements*sizeof(el_left[0]));
    struct element * el_right=malloc(num_elements*sizeof(el_right[0]));
    int i;
    for(i=0; i<num_elements; i++)
    {
        init_element(el_left+i,i+1,((mpi_rank-1)+mpi_size)%mpi_size);
        init_element(el_right+i,i+1,(mpi_rank+1)%mpi_size);
    }

    /* Communication loop */
    for (i=0; i<NUM_LOOPS; i++) //number of "computations loops"
    {
        int e;
        for (e=0; e<num_elements; e++) //Do something for each elements
        {
            insert_work_for_one_element(el_right+e);
            insert_work_for_one_element(el_left+e);
        }
    }
    /* End */
    starpu_task_wait_for_all();

    for(i=0; i<num_elements; i++)
    {
        free_element(el_left+i);
        free_element(el_right+i);
    }

    starpu_mpi_shutdown();
    starpu_shutdown();

    MPI_Finalize();
    FPRINTF(stderr, "No assert until end\n");
    return 0;
}
int main(int argc, char **argv)
{
	/* create a simple definite positive symetric matrix example
	 *
	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
	 * */

	float ***bmat;
	int rank, nodes, ret;
	double timing, flops;
	int correctness;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &nodes);
	starpu_cublas_init();

	parse_args(argc, argv, nodes);

	matrix_init(&bmat, rank, nodes, 1);
	matrix_display(bmat, rank);

	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);

	starpu_mpi_shutdown();

	matrix_display(bmat, rank);

	dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops);

	matrix_free(&bmat, rank, nodes, 1);
	starpu_cublas_shutdown();
	starpu_shutdown();

	assert(correctness);

	if (rank == 0)
	{
		FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000);
		FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f));
	}

	return 0;
}
Exemple #7
0
int main(int argc, char **argv)
{
	int ret, rank, size;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (size<3)
	{
		FPRINTF(stderr, "We need more than 2 processes.\n");
		MPI_Finalize();
		return STARPU_TEST_SKIPPED;
	}

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(NULL, NULL, 0);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (rank == 0)
	{
		int n;
		for(n=1 ; n<size ; n++)
		{
			int i, var[2];
			MPI_Status status[3];
			starpu_data_handle_t handle[2];

			FPRINTF_MPI(stderr, "receiving from node %d\n", n);
			for(i=0 ; i<2 ; i++)
				starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i]));

			starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]);
			starpu_data_acquire(handle[0], STARPU_R);
			STARPU_ASSERT_MSG(var[0] == n, "Received incorrect value <%d> from node <%d>\n", var[0], n);
			FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n);
			starpu_data_release(handle[0]);

			starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[1]);
			starpu_mpi_recv(handle[1], n, 44, MPI_COMM_WORLD, &status[2]);
			for(i=0 ; i<2 ; i++)
				starpu_data_acquire(handle[i], STARPU_R);
			STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n);
			STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n);
			FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n);
			for(i=0 ; i<2 ; i++)
				starpu_data_release(handle[i]);
			for(i=0 ; i<2 ; i++)
				starpu_data_unregister(handle[i]);
		}
	}
	else
	{
		int i, var[3];
		starpu_data_handle_t handle[3];

		FPRINTF_MPI(stderr, "sending to node %d\n", 0);
		var[0] = rank;
		var[1] = var[0] * 2;
		var[2] = var[0] * 4;
		for(i=0 ; i<3 ; i++)
			starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i]));
		starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD);
		starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD);
		starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD);
		for(i=0 ; i<3 ; i++)
			starpu_data_unregister(handle[i]);
	}

	starpu_mpi_shutdown();
	starpu_shutdown();
	MPI_Finalize();

	return 0;
}
int main(int argc, char **argv)
{
	int size, n, x=789;
	int rank, other_rank;
	int ret;
	starpu_data_handle_t data[2];

        MPI_Init(&argc, &argv);
        starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
        starpu_mpi_comm_size(MPI_COMM_WORLD, &size);

        if (size % 2)
        {
		FPRINTF(stderr, "We need a even number of processes.\n");
                MPI_Finalize();
                return STARPU_TEST_SKIPPED;
        }

	other_rank = rank%2 == 0 ? rank+1 : rank-1;
	FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank);

	if (rank % 2)
	{
		MPI_Send(&rank, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD);
		FPRINTF(stderr, "[%d] sending %d\n", rank, rank);
	}
	else
	{
		MPI_Recv(&x, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		FPRINTF(stderr, "[%d] received %d\n", rank, x);
	}

        ret = starpu_init(NULL);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
        ret = starpu_mpi_init(NULL, NULL, 0);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (rank % 2)
	{
		starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned));
		starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned));
		starpu_mpi_data_register(data[1], 22, 0);
	}
	else
		starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(unsigned));
	starpu_mpi_data_register(data[0], 12, 0);

	if (rank % 2)
	{
		starpu_mpi_req req;
		starpu_mpi_issend(data[1], &req, other_rank, 22, MPI_COMM_WORLD);
		starpu_mpi_send(data[0], other_rank, 12, MPI_COMM_WORLD);
		starpu_mpi_wait(&req, MPI_STATUS_IGNORE);
	}
	else
	{
		int *xx;

		starpu_mpi_recv(data[0], other_rank, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		xx = (int *)starpu_variable_get_local_ptr(data[0]);
		FPRINTF_MPI(stderr, "received %d\n", *xx);
		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);

		starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(unsigned));
		starpu_mpi_data_register(data[1], 22, 0);
		starpu_mpi_recv(data[0],  other_rank, 22, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		xx = (int *)starpu_variable_get_local_ptr(data[0]);
		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
	}

	starpu_data_unregister(data[0]);
	starpu_data_unregister(data[1]);

	starpu_mpi_shutdown();
	starpu_shutdown();
        MPI_Finalize();
	return 0;
}
int do_test(int rank, int sdetached, int rdetached)
{
	int ret, i;
	int val[2];
	starpu_data_handle_t data[2];

	ret = starpu_init(NULL);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
        ret = starpu_mpi_init(NULL, NULL, 0);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (rank == 1)
	{
		val[0] = VAL0;
		val[1] = VAL1;
	}
	else
	{
		val[0] = -1;
		val[1] = -1;
	}
	starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0]));
	starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1]));
	starpu_mpi_data_register(data[0], 77, 1);
	starpu_mpi_data_register(data[1], 88, 1);

	if (rank == 1)
	{
		for(i=1 ; i>=0 ; i--)
		{
			if (sdetached)
				starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL);
			else
				starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD);
		}
	}
	else if (rank == 0)
	{
		int received = 0;

		for(i=0 ; i<2 ; i++)
			FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]);
		for(i=0 ; i<2 ; i++)
		{
			if (rdetached)
				starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received);
			else
				starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		}

		if (rdetached)
		{
			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			while (received != 2)
			{
				FPRINTF_MPI(stderr, "Received %d messages\n", received);
				STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
			}
			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
		}

		for(i=0 ; i<2 ; i++)
			starpu_data_acquire(data[i], STARPU_R);
		for(i=0 ; i<2 ; i++)
			FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]);
		for(i=0 ; i<2 ; i++)
			starpu_data_release(data[i]);
	}
	FPRINTF_MPI(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();

	starpu_data_unregister(data[0]);
	starpu_data_unregister(data[1]);

	if (rank == 0)
	{
		ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1;
	}
	starpu_mpi_shutdown();
	starpu_shutdown();
	return ret;
}
Exemple #10
0
int main(int argc, char **argv)
{
	int size, x;
	int color;
	MPI_Comm newcomm;
	int rank, newrank;
	int ret;
	starpu_data_handle_t data[3];
	int value = 90;

        MPI_Init(&argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        MPI_Comm_size(MPI_COMM_WORLD, &size);

        if (size < 4)
        {
		FPRINTF(stderr, "We need at least 4 processes.\n");
                MPI_Finalize();
                return STARPU_TEST_SKIPPED;
        }

	color = rank%2;
	MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm);
	MPI_Comm_rank(newcomm, &newrank);
	FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color);

	if (newrank == 0)
	{
		FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank);
		MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm);
	}
	else if (newrank == 1)
	{
		MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE);
		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x);
	}

        ret = starpu_init(NULL);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
        ret = starpu_mpi_init(NULL, NULL, 0);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");

	if (rank == 0)
	{
		starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int));
	}
	else
		starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int));
	starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD);

	if (newrank == 0)
	{
		starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
		starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
	}
	else
		starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int));
	starpu_mpi_data_register_comm(data[0], 12, 0, newcomm);

	if (newrank == 0)
	{
		starpu_mpi_req req[2];
		starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm);
		starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm);
		starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE);
		starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE);
	}
	else if (newrank == 1)
	{
		int *xx;

		starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[0], STARPU_RW);
		xx = (int *)starpu_variable_get_local_ptr(data[0]);
		starpu_data_release(data[0]);
		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);

		starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int));
		starpu_mpi_data_register_comm(data[1], 22, 0, newcomm);
		starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[0], STARPU_RW);
		xx = (int *)starpu_variable_get_local_ptr(data[0]);
		starpu_data_release(data[0]);
		FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx);
		STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x);
	}

	if (rank == 0)
	{
		starpu_data_acquire(data[2], STARPU_RW);
		int rvalue = *((int *)starpu_variable_get_local_ptr(data[2]));
		starpu_data_release(data[2]);
		FPRINTF_MPI(stderr, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1);
		starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD);
		starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[2], STARPU_RW);
		int *xx = (int *)starpu_variable_get_local_ptr(data[2]);
		starpu_data_release(data[2]);
		FPRINTF_MPI(stderr, "Value back is %d\n", *xx);
		STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1)));
	}
	else
	{
		int next = (rank == size-1) ? 0 : rank+1;
		starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		starpu_data_acquire(data[2], STARPU_RW);
		int *xx = (int *)starpu_variable_get_local_ptr(data[2]);
		FPRINTF_MPI(stderr, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next);
		*xx = *xx + 2;
		starpu_data_release(data[2]);
		starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD);
	}

	if (newrank == 0 || newrank == 1)
	{
		starpu_mpi_insert_task(newcomm, &mycodelet,
				       STARPU_RW, data[0],
				       STARPU_VALUE, &x, sizeof(x),
				       STARPU_EXECUTE_ON_NODE, 1,
				       0);

		starpu_task_wait_for_all();
		starpu_data_unregister(data[0]);
		starpu_data_unregister(data[1]);
	}
	starpu_data_unregister(data[2]);

	starpu_mpi_shutdown();
	starpu_shutdown();
	MPI_Comm_free(&newcomm);
        MPI_Finalize();
	return 0;
}
Exemple #11
0
int main(int argc, char **argv)
{
	int my_rank, size, x, y, loop;
	float mean=0;
	float matrix[X][Y];
	starpu_data_handle_t data_handles[X][Y];

	int ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	starpu_mpi_init(&argc, &argv, 1);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	parse_args(argc, argv);

	/* Initial data values */
	starpu_srand48((long int)time(NULL));
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			matrix[x][y] = (float)starpu_drand48();
			mean += matrix[x][y];
		}
	}
	mean /= (X*Y);

	if (display)
	{
		FPRINTF_MPI(stdout, "mean=%2.2f\n", mean);
		for(x = 0; x < X; x++)
		{
			fprintf(stdout, "[%d] ", my_rank);
			for (y = 0; y < Y; y++)
			{
				fprintf(stdout, "%2.2f ", matrix[x][y]);
			}
			fprintf(stdout, "\n");
		}
	}

	/* Initial distribution */
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			int mpi_rank = my_distrib(x, y, size);
			if (mpi_rank == my_rank)
			{
				//FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y);
				starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float));
			}
			else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
				 || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))
			{
				/* I don't own that index, but will need it for my computations */
				//FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y);
				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
			}
			else
			{
				/* I know it's useless to allocate anything for this */
				data_handles[x][y] = NULL;
			}
			if (data_handles[x][y])
			{
				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
			}
		}
	}

	/* First computation with initial distribution */
	for(loop=0 ; loop<niter; loop++)
	{
		for (x = 1; x < X-1; x++)
		{
			for (y = 1; y < Y-1; y++)
			{
				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
						       0);
			}
		}
	}
	FPRINTF(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();

	/* Now migrate data to a new distribution */

	/* First register newly needed data */
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			int mpi_rank = my_distrib2(x, y, size);
			if (!data_handles[x][y] && (mpi_rank == my_rank
				 || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size)
				 || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size)))
			{
				/* Register newly-needed data */
				starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float));
				starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank);
			}
			if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y]))
			{
				/* Migrate the data */
				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
				/* And register new rank of the matrix */
				starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank);
			}
		}
	}

	/* Second computation with new distribution */
	for(loop=0 ; loop<niter; loop++)
	{
		for (x = 1; x < X-1; x++)
		{
			for (y = 1; y < Y-1; y++)
			{
				starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y],
						       STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y],
						       STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1],
						       0);
			}
		}
	}
	FPRINTF(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();

	/* Unregister data */
	for(x = 0; x < X; x++)
	{
		for (y = 0; y < Y; y++)
		{
			if (data_handles[x][y])
			{
				int mpi_rank = my_distrib(x, y, size);
				/* Get back data to original place where the user-provided buffer is. */
				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
				/* Register original rank of the matrix (although useless) */
				starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank);
				/* And unregister it */
				starpu_data_unregister(data_handles[x][y]);
			}
		}
	}

	starpu_mpi_shutdown();
	starpu_shutdown();

	if (display)
	{
		FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean);
		for(x = 0; x < X; x++)
		{
			FPRINTF(stdout, "[%d] ", my_rank);
			for (y = 0; y < Y; y++)
			{
				FPRINTF(stdout, "%2.2f ", matrix[x][y]);
			}
			FPRINTF(stdout, "\n");
		}
	}

	return 0;
}
int main(int argc, char **argv)
{
	int rank, size, err;
	int x[2];
	int ret, i;
	starpu_data_handle_t data_handles[2];
	int values[2];

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (rank == 0)
	{
		x[0] = 11;
		starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0]));
		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
	}
	else if (rank == 1)
	{
		x[1] = 12;
		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
		starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1]));
	}
	else
	{
		starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0]));
		starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1]));
	}

	starpu_mpi_data_register(data_handles[0], 0, 0);
	starpu_mpi_data_register(data_handles[1], 1, 1);

	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet,
				     STARPU_RW, data_handles[0], STARPU_RW, data_handles[1],
				     STARPU_EXECUTE_ON_DATA, data_handles[1],
				     0);
	assert(err == 0);
	starpu_task_wait_for_all();

	for(i=0 ; i<2 ; i++)
	{
		starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL);
		if (rank == 0)
		{
			starpu_data_acquire(data_handles[i], STARPU_R);
			values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i]));
			starpu_data_release(data_handles[i]);		}
	}
	ret = 0;
	if (rank == 0)
	{
		FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]);
		if (values[0] != 12 || values[1] != 144)
		{
			ret = EXIT_FAILURE;
		}
	}

	starpu_data_unregister(data_handles[0]);
	starpu_data_unregister(data_handles[1]);

	starpu_mpi_shutdown();
	starpu_shutdown();

	return ret;
}
int main(int argc, char **argv)
{
	int rank, nodes;
	int ret=0;
	int compare=0;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
	starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes);

	if (nodes < 2)
	{
		fprintf(stderr, "This program needs at least 2 nodes (%d available)\n", nodes);
		ret = 77;
	}
	else
	{
		starpu_data_handle_t handle;
		starpu_data_handle_t handle2;

		double real[2] = {4.0, 2.0};
		double imaginary[2] = {7.0, 9.0};

		double real2[2] = {14.0, 12.0};
		double imaginary2[2] = {17.0, 19.0};

		if (rank == 1)
		{
			real[0] = 0.0;
			real[1] = 0.0;
			imaginary[0] = 0.0;
			imaginary[1] = 0.0;
		}

		starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2);
		starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);

		if (rank == 0)
		{
			int *compare_ptr = &compare;

			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0);
			starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL);
			starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL);

			starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0);
			starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0);
		}
		else if (rank == 1)
		{
			starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL);
			starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0);
			starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL);
		}

		starpu_task_wait_for_all();

		starpu_data_unregister(handle);
		starpu_data_unregister(handle2);
	}

	starpu_mpi_shutdown();
	starpu_shutdown();

	if (rank == 0) return !compare; else return ret;
}
int main(int argc, char **argv)
{
	int ret, rank, size, err, node;
	long x0=32;
	int x1=23;
	starpu_data_handle_t data_handlesx0;
	starpu_data_handle_t data_handlesx1;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(&argc, &argv, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (rank != 0 && rank != 1) goto end;

	if (rank == 0)
	{
		starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0));
		starpu_mpi_data_register(data_handlesx0, 0, rank);
		starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1));
		starpu_mpi_data_register(data_handlesx1, 1, 1);
	}
	else if (rank == 1)
	{
		starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1));
		starpu_mpi_data_register(data_handlesx1, 1, rank);
		starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0));
		starpu_mpi_data_register(data_handlesx0, 0, 0);
	}

	node = starpu_mpi_data_get_rank(data_handlesx1);
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1,
				     0);
	assert(err == 0);

	node = starpu_mpi_data_get_rank(data_handlesx0);
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1,
				     0);
	assert(err == 0);

	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1,
				     0);
	assert(err == 0);

	node = 1;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	/* Here the value specified by the property STARPU_EXECUTE_ON_NODE is
	   going to overwrite the node even though the data model clearly specifies
	   which node is going to execute the codelet */
	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	/* Here the value specified by the property STARPU_EXECUTE_ON_NODE is
	   going to overwrite the node even though the data model clearly specifies
	   which node is going to execute the codelet */
	node = 0;
	err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r,
				     STARPU_VALUE, &node, sizeof(node),
				     STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node,
				     0);
	assert(err == 0);

	FPRINTF_MPI(stderr, "Waiting ...\n");
	starpu_task_wait_for_all();
	starpu_data_unregister(data_handlesx0);
	starpu_data_unregister(data_handlesx1);

end:
	starpu_mpi_shutdown();
	starpu_shutdown();

	return 0;
}
int main(int argc, char **argv)
{
	int rank;
	int world_size;

	/*
	 *	Initialization
	 */
	int thread_support;
	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) {
		fprintf(stderr,"MPI_Init_thread failed\n");
		exit(1);
	}
	if (thread_support == MPI_THREAD_FUNNELED)
		fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
	if (thread_support < MPI_THREAD_FUNNELED)
		fprintf(stderr,"Warning: MPI does not have thread support!\n");

	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &world_size);

	starpu_srand48((long int)time(NULL));

	parse_args(rank, argc, argv);

	int ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/* We disable sequential consistency in this example */
	starpu_data_set_default_sequential_consistency_flag(0);

	starpu_mpi_init(NULL, NULL, 0);

	STARPU_ASSERT(p*q == world_size);

	starpu_cublas_init();

	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);

	/*
	 * 	Problem Init
	 */

	init_matrix(rank);

	fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank,
                        (int)(allocated_memory/(1024*1024)),
			(int)(allocated_memory_extra/(1024*1024)),
                        (int)((allocated_memory+allocated_memory_extra)/(1024*1024)));

	display_grid(rank, nblocks);

	TYPE *a_r = NULL;
//	STARPU_PLU(display_data_content)(a_r, size);

	TYPE *x, *y;

	if (check)
	{
		x = calloc(size, sizeof(TYPE));
		STARPU_ASSERT(x);

		y = calloc(size, sizeof(TYPE));
		STARPU_ASSERT(y);

		if (rank == 0)
		{
			unsigned ind;
			for (ind = 0; ind < size; ind++)
				x[ind] = (TYPE)starpu_drand48();
		}

		a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);

		if (rank == 0)
			STARPU_PLU(display_data_content)(a_r, size);

//		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
	}

	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);

	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);

	/*
	 * 	Report performance
	 */

	int reduce_ret;
	double min_timing = timing;
	double max_timing = timing;
	double sum_timing = timing;

	reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);

	reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);

	reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);

	if (rank == 0)
	{
		fprintf(stderr, "Computation took: %f ms\n", max_timing/1000);
		fprintf(stderr, "\tMIN : %f ms\n", min_timing/1000);
		fprintf(stderr, "\tMAX : %f ms\n", max_timing/1000);
		fprintf(stderr, "\tAVG : %f ms\n", sum_timing/(world_size*1000));

		unsigned n = size;
		double flop = (2.0f*n*n*n)/3.0f;
		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/max_timing/1000.0f));
	}

	/*
	 *	Test Result Correctness
	 */

	if (check)
	{
		/*
		 *	Compute || A - LU ||
		 */

		STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r);

#if 0
		/*
		 *	Compute || Ax - LUx ||
		 */

		unsigned ind;

		y2 = calloc(size, sizeof(TYPE));
		STARPU_ASSERT(y);

		if (rank == 0)
		{
			for (ind = 0; ind < size; ind++)
			{
				y2[ind] = (TYPE)0.0;
			}
		}

		STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank);

		/* Compute y2 = y2 - y */
		CPU_AXPY(size, -1.0, y, 1, y2, 1);

		TYPE err = CPU_ASUM(size, y2, 1);
		int max = CPU_IAMAX(size, y2, 1);

		fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size));
		fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]);
#endif
	}

	/*
	 * 	Termination
	 */

	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);

	starpu_cublas_shutdown();
	starpu_mpi_shutdown();
	starpu_shutdown();

#if 0
	MPI_Finalize();
#endif

	return 0;
}
int main(int argc, char **argv)
{
	int ret, rank, size;

	ret = starpu_init(NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
	ret = starpu_mpi_init(NULL, NULL, 1);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (size < 2)
	{
		if (rank == 0)
			FPRINTF(stderr, "We need at least 2 processes.\n");

		MPI_Finalize();
		return STARPU_TEST_SKIPPED;
	}


	starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token));

	int nloops = NITER;
	int loop;

	int last_loop = nloops - 1;
	int last_rank = size - 1;

	for (loop = 0; loop < nloops; loop++)
	{
		int tag = loop*size + rank;

		if (loop == 0 && rank == 0)
		{
			token = 0;
			FPRINTF(stdout, "Start with token value %u\n", token);
		}
		else
		{
			starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL);
		}

		increment_token();

		if (loop == last_loop && rank == last_rank)
		{
			starpu_data_acquire(token_handle, STARPU_R);
			FPRINTF(stdout, "Finished : token value %u\n", token);
			starpu_data_release(token_handle);
		}
		else
		{
			starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL);
		}
	}

	starpu_task_wait_for_all();

	starpu_data_unregister(token_handle);
	starpu_mpi_shutdown();
	starpu_shutdown();

	if (rank == last_rank)
	{
		FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size);
		STARPU_ASSERT(token == nloops*size);
	}

	return 0;
}
Exemple #17
0
int main(int argc, char **argv)
{
	MPI_Init(NULL, NULL);

	int rank, size;

	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	if (size < 2)
	{
		if (rank == 0)
			fprintf(stderr, "We need at least processes.\n");

		MPI_Finalize();
		return 0;
	}

	/* We only use 2 nodes for that test */
	if (rank >= 2)
	{
		MPI_Finalize();
		return 0;
	}
		
	starpu_init(NULL);
	starpu_mpi_initialize();

	/* Node 0 will allocate a big block and only register an inner part of
	 * it as the block data, Node 1 will allocate a block of small size and
	 * register it directly. Node 0 and 1 will then exchange the content of
	 * their blocks. */

	float *block;
	starpu_data_handle block_handle;

	if (rank == 0)
	{
		block = calloc(BIGSIZE*BIGSIZE*BIGSIZE, sizeof(float));
		assert(block);

		/* fill the inner block */
		unsigned i, j, k;
		for (k = 0; k < SIZE; k++)
		for (j = 0; j < SIZE; j++)
		for (i = 0; i < SIZE; i++)
		{
			block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f;
		}

		starpu_block_data_register(&block_handle, 0,
			(uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE,
			SIZE, SIZE, SIZE, sizeof(float));
	}
	else /* rank == 1 */
	{
		block = calloc(SIZE*SIZE*SIZE, sizeof(float));
		assert(block);

		starpu_block_data_register(&block_handle, 0,
			(uintptr_t)block, SIZE, SIZE*SIZE,
			SIZE, SIZE, SIZE, sizeof(float));
	}

	if (rank == 0)
	{
		starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD);

		MPI_Status status;
		starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status);

		/* check the content of the block */
		starpu_data_acquire(block_handle, STARPU_R);
		unsigned i, j, k;
		for (k = 0; k < SIZE; k++)
		for (j = 0; j < SIZE; j++)
		for (i = 0; i < SIZE; i++)
		{
			assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f);
		}
		starpu_data_release(block_handle);
		
	}
	else /* rank == 1 */
	{
		MPI_Status status;
		starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status);

		/* check the content of the block and modify it */
		starpu_data_acquire(block_handle, STARPU_RW);
		unsigned i, j, k;
		for (k = 0; k < SIZE; k++)
		for (j = 0; j < SIZE; j++)
		for (i = 0; i < SIZE; i++)
		{
			assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f);
			block[i + j*SIZE + k*SIZE*SIZE] = 33.0f;
		}
		starpu_data_release(block_handle);

		starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD);
	}

	fprintf(stdout, "Rank %d is done\n", rank);
	fflush(stdout);

	starpu_mpi_shutdown();
	starpu_shutdown();

	MPI_Finalize();

	return 0;
}