int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var; MPI_Init(&argc, &argv); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return STARPU_TEST_SKIPPED; } if (rank == 0) { int n; for(n=1 ; n<size ; n++) { MPI_Status status; FPRINTF_MPI(stderr, "receiving from node %d\n", n); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n); starpu_data_release(handle); starpu_data_unregister(handle); } } else { FPRINTF_MPI(stderr, "sending to node %d\n", 0); var = rank; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD); starpu_data_unregister(handle); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size; int rank; int ret=0; int sdetached, rdetached; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { FPRINTF_MPI(stderr, "We need at least 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } for(sdetached=0 ; sdetached<=1 ; sdetached++) { for(rdetached=0 ; rdetached<=1 ; rdetached++) { ret += do_test(rank, sdetached, rdetached); } } MPI_Finalize(); return ret; }
/* * Codelet to perform the reduction of two elements */ void redux_cpu_func(void *descr[], void *cl_arg) { long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); *dota = *dota + *dotb; FPRINTF_MPI(stderr, "Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb); }
void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args) { int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int rank; starpu_codelet_unpack_args(_args, &rank); FPRINTF_MPI(stderr, "Executing codelet with value %d and rank %d\n", *value, rank); STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank); }
void callback(void *arg) { unsigned *received = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *received = *received + 1; FPRINTF_MPI(stderr, "Requests %d received\n", *received); STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); }
void func_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args) { int node; int rank; starpu_codelet_unpack_args(_args, &node); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank); assert(node == rank); }
void check_variable(starpu_data_handle_t handle, int i, int rank, int *error) { int other_rank = rank%2 == 0 ? rank+1 : rank-1; int *rvalue = (int *)starpu_data_get_local_ptr(handle); if (*rvalue != i*other_rank) { FPRINTF_MPI(stderr, "Incorrect received value: %d != %d\n", *rvalue, i*other_rank); *error = 1; } }
void check_complex(starpu_data_handle_t handle, int i, int rank, int *error) { double *real = starpu_complex_get_real(handle); double *imaginary = starpu_complex_get_imaginary(handle); int other_rank = rank%2 == 0 ? rank+1 : rank-1; if ((*real != ((i*other_rank)+12)) || (*imaginary != ((i*other_rank)+45))) { FPRINTF_MPI(stderr, "Incorrect received value: %f != %d || %f != %d\n", *real, ((i*other_rank)+12), *imaginary, ((i*other_rank)+45)); *error = 1; } }
int exchange_void(int rank, int detached) { int ret, i; starpu_data_handle_t tab_handle[NB]; FPRINTF_MPI(stderr, "Exchanging void data with detached=%d\n", detached); for(i=0 ; i<NB ; i++) { starpu_void_data_register(&tab_handle[i]); starpu_mpi_data_register(tab_handle[i], i, rank); } ret = exchange(rank, tab_handle, check_void, detached); for(i=0 ; i<NB ; i++) starpu_data_unregister(tab_handle[i]); return ret; }
int exchange_variable(int rank, int detached) { int ret, i; starpu_data_handle_t tab_handle[NB]; int value[NB]; FPRINTF_MPI(stderr, "Exchanging variable data with detached=%d\n", detached); for(i=0 ; i<NB ; i++) { value[i]=i*rank; starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&value[i], sizeof(int)); starpu_mpi_data_register(tab_handle[i], i, rank); } ret = exchange(rank, tab_handle, check_variable, detached); for(i=0 ; i<NB ; i++) starpu_data_unregister(tab_handle[i]); return ret; }
int exchange_complex(int rank, int detached) { int ret, i; starpu_data_handle_t handle[NB]; double real[NB]; double imaginary[NB]; FPRINTF_MPI(stderr, "Exchanging complex data with detached=%d\n", detached); for(i=0 ; i<NB ; i++) { real[i] = (i*rank)+12; imaginary[i] = (i*rank)+45; starpu_complex_data_register(&handle[i], STARPU_MAIN_RAM, &real[i], &imaginary[i], 1); starpu_mpi_data_register(handle[i], i, rank); } ret = exchange(rank, handle, check_complex, detached); for(i=0 ; i<NB ; i++) starpu_data_unregister(handle[i]); return ret; }
/* * Display codelet */ void display_cpu_func(void *descr[], void *cl_arg) { long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]); FPRINTF_MPI(stderr, "Local=%ld\n", *local_x); }
int main(int argc, char **argv) { int ret, rank, size, err, node; long x0=32; int x1=23; starpu_data_handle_t data_handlesx0; starpu_data_handle_t data_handlesx1; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank != 0 && rank != 1) goto end; if (rank == 0) { starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, rank); starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, 1); } else if (rank == 1) { starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, rank); starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, 0); } node = starpu_mpi_data_get_rank(data_handlesx1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, 0); assert(err == 0); node = starpu_mpi_data_get_rank(data_handlesx0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1, 0); assert(err == 0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); node = 1; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r, STARPU_VALUE, &node, sizeof(node), STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data_handlesx0); starpu_data_unregister(data_handlesx1); end: starpu_mpi_shutdown(); starpu_shutdown(); return 0; }
/* * Codelet to create a neutral element */ void init_cpu_func(void *descr[], void *cl_arg) { long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); *dot = 0; FPRINTF_MPI(stderr, "Init dot\n"); }
int main(int argc, char **argv) { int ret, rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 0) { int n; for(n=1 ; n<size ; n++) { int i, var[2]; MPI_Status status[3]; starpu_data_handle_t handle[2]; FPRINTF_MPI(stderr, "receiving from node %d\n", n); for(i=0 ; i<2 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]); starpu_data_acquire(handle[0], STARPU_R); STARPU_ASSERT_MSG(var[0] == n, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n); starpu_data_release(handle[0]); starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[1]); starpu_mpi_recv(handle[1], n, 44, MPI_COMM_WORLD, &status[2]); for(i=0 ; i<2 ; i++) starpu_data_acquire(handle[i], STARPU_R); STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n); STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n); for(i=0 ; i<2 ; i++) starpu_data_release(handle[i]); for(i=0 ; i<2 ; i++) starpu_data_unregister(handle[i]); } } else { int i, var[3]; starpu_data_handle_t handle[3]; FPRINTF_MPI(stderr, "sending to node %d\n", 0); var[0] = rank; var[1] = var[0] * 2; var[2] = var[0] * 4; for(i=0 ; i<3 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD); starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD); starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD); for(i=0 ; i<3 ; i++) starpu_data_unregister(handle[i]); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size, n, x=789; int rank, other_rank; int ret; starpu_data_handle_t data[2]; MPI_Init(&argc, &argv); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size % 2) { FPRINTF(stderr, "We need a even number of processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } other_rank = rank%2 == 0 ? rank+1 : rank-1; FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); if (rank % 2) { MPI_Send(&rank, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD); FPRINTF(stderr, "[%d] sending %d\n", rank, rank); } else { MPI_Recv(&x, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d] received %d\n", rank, x); } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank % 2) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); starpu_mpi_data_register(data[1], 22, 0); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data[0], 12, 0); if (rank % 2) { starpu_mpi_req req; starpu_mpi_issend(data[1], &req, other_rank, 22, MPI_COMM_WORLD); starpu_mpi_send(data[0], other_rank, 12, MPI_COMM_WORLD); starpu_mpi_wait(&req, MPI_STATUS_IGNORE); } else { int *xx; starpu_mpi_recv(data[0], other_rank, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF_MPI(stderr, "received %d\n", *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data[1], 22, 0); starpu_mpi_recv(data[0], other_rank, 22, MPI_COMM_WORLD, MPI_STATUS_IGNORE); xx = (int *)starpu_variable_get_local_ptr(data[0]); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); } starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int exchange(int rank, starpu_data_handle_t *handles, check_func func, int detached) { int other_rank = rank%2 == 0 ? rank+1 : rank-1; int i; if (rank%2) { starpu_mpi_send(handles[0], other_rank, 0, MPI_COMM_WORLD); starpu_mpi_send(handles[NB-1], other_rank, NB-1, MPI_COMM_WORLD); for(i=1 ; i<NB-1 ; i++) { starpu_mpi_send(handles[i], other_rank, i, MPI_COMM_WORLD); } return 0; } else { int ret=0; starpu_mpi_req req[NB]; int received = 0; if (detached) { starpu_mpi_irecv_detached(handles[0], other_rank, 0, MPI_COMM_WORLD, callback, &received); } else { memset(req, 0, NB*sizeof(starpu_mpi_req)); starpu_mpi_irecv(handles[0], &req[0], other_rank, 0, MPI_COMM_WORLD); STARPU_ASSERT(req[0] != NULL); } // We sleep to make sure that the data for the tag 9 will be received before the recv is posted usleep(2000000); for(i=1 ; i<NB ; i++) { if (detached) { starpu_mpi_irecv_detached(handles[i], other_rank, i, MPI_COMM_WORLD, callback, &received); } else { starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD); STARPU_ASSERT(req[i] != NULL); } } if (detached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != NB) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { for(i=0 ; i<NB ; i++) { starpu_mpi_wait(&req[i], MPI_STATUS_IGNORE); func(handles[i], i, rank, &ret); } } return ret; } }
int main(int argc, char **argv) { int my_rank, size, x, y, loop; float mean=0; float matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_mpi_init(&argc, &argv, 1); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); parse_args(argc, argv); /* Initial data values */ starpu_srand48((long int)time(NULL)); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (float)starpu_drand48(); mean += matrix[x][y]; } } mean /= (X*Y); if (display) { FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); for(x = 0; x < X; x++) { fprintf(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { fprintf(stdout, "%2.2f ", matrix[x][y]); } fprintf(stdout, "\n"); } } /* Initial distribution */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == my_rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); } else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) { /* I don't own that index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); } else { /* I know it's useless to allocate anything for this */ data_handles[x][y] = NULL; } if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } } } /* First computation with initial distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Now migrate data to a new distribution */ /* First register newly needed data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib2(x, y, size); if (!data_handles[x][y] && (mpi_rank == my_rank || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size) || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size))) { /* Register newly-needed data */ starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y])) { /* Migrate the data */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* And register new rank of the matrix */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); } } } /* Second computation with new distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Unregister data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_handles[x][y]) { int mpi_rank = my_distrib(x, y, size); /* Get back data to original place where the user-provided buffer is. */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* Register original rank of the matrix (although useless) */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); /* And unregister it */ starpu_data_unregister(data_handles[x][y]); } } } starpu_mpi_shutdown(); starpu_shutdown(); if (display) { FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean); for(x = 0; x < X; x++) { FPRINTF(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { FPRINTF(stdout, "%2.2f ", matrix[x][y]); } FPRINTF(stdout, "\n"); } } return 0; }
int do_test(int rank, int sdetached, int rdetached) { int ret, i; int val[2]; starpu_data_handle_t data[2]; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 1) { val[0] = VAL0; val[1] = VAL1; } else { val[0] = -1; val[1] = -1; } starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0])); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1])); starpu_mpi_data_register(data[0], 77, 1); starpu_mpi_data_register(data[1], 88, 1); if (rank == 1) { for(i=1 ; i>=0 ; i--) { if (sdetached) starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL); else starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD); } } else if (rank == 0) { int received = 0; for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) { if (rdetached) starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received); else starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rdetached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != 2) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } for(i=0 ; i<2 ; i++) starpu_data_acquire(data[i], STARPU_R); for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) starpu_data_release(data[i]); } FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); if (rank == 0) { ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1; } starpu_mpi_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int size, x; int color; MPI_Comm newcomm; int rank, newrank; int ret; starpu_data_handle_t data[3]; int value = 90; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 4) { FPRINTF(stderr, "We need at least 4 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } color = rank%2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); MPI_Comm_rank(newcomm, &newrank); FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); if (newrank == 0) { FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); } else if (newrank == 1) { MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 0) { starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int)); } else starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD); if (newrank == 0) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[0], 12, 0, newcomm); if (newrank == 0) { starpu_mpi_req req[2]; starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm); starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm); starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); } else if (newrank == 1) { int *xx; starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE); starpu_data_acquire(data[0], STARPU_RW); xx = (int *)starpu_variable_get_local_ptr(data[0]); starpu_data_release(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE); starpu_data_acquire(data[0], STARPU_RW); xx = (int *)starpu_variable_get_local_ptr(data[0]); starpu_data_release(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); } if (rank == 0) { starpu_data_acquire(data[2], STARPU_RW); int rvalue = *((int *)starpu_variable_get_local_ptr(data[2])); starpu_data_release(data[2]); FPRINTF_MPI(stderr, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1); starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD); starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); starpu_data_release(data[2]); FPRINTF_MPI(stderr, "Value back is %d\n", *xx); STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1))); } else { int next = (rank == size-1) ? 0 : rank+1; starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); FPRINTF_MPI(stderr, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next); *xx = *xx + 2; starpu_data_release(data[2]); starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD); } if (newrank == 0 || newrank == 1) { starpu_mpi_insert_task(newcomm, &mycodelet, STARPU_RW, data[0], STARPU_VALUE, &x, sizeof(x), STARPU_EXECUTE_ON_NODE, 1, 0); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); } starpu_data_unregister(data[2]); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Comm_free(&newcomm); MPI_Finalize(); return 0; }