int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var; MPI_Init(&argc, &argv); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return STARPU_TEST_SKIPPED; } if (rank == 0) { int n; for(n=1 ; n<size ; n++) { MPI_Status status; FPRINTF_MPI(stderr, "receiving from node %d\n", n); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n); starpu_data_release(handle); starpu_data_unregister(handle); } } else { FPRINTF_MPI(stderr, "sending to node %d\n", 0); var = rank; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD); starpu_data_unregister(handle); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int ret, rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); tab = malloc(SIZE*sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { starpu_tag_t tag = (starpu_tag_t)loop; if ((loop % 2) == (rank%2)) { starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); } else { starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); } starpu_tag_wait(tag); } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
/*main program*/ int main(int argc, char * argv[]) { /* Init */ int ret; int mpi_rank, mpi_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); /*element initialization : domains are connected as a ring for this test*/ int num_elements=NUM_EL; struct element * el_left=malloc(num_elements*sizeof(el_left[0])); struct element * el_right=malloc(num_elements*sizeof(el_right[0])); int i; for(i=0; i<num_elements; i++) { init_element(el_left+i,i+1,((mpi_rank-1)+mpi_size)%mpi_size); init_element(el_right+i,i+1,(mpi_rank+1)%mpi_size); } /* Communication loop */ for (i=0; i<NUM_LOOPS; i++) //number of "computations loops" { int e; for (e=0; e<num_elements; e++) //Do something for each elements { insert_work_for_one_element(el_right+e); insert_work_for_one_element(el_left+e); } } /* End */ starpu_task_wait_for_all(); for(i=0; i<num_elements; i++) { free_element(el_left+i); free_element(el_right+i); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); FPRINTF(stderr, "No assert until end\n"); return 0; }
int main(int argc, char **argv) { /* create a simple definite positive symetric matrix example * * Hilbert matrix : h(i,j) = 1/(i+j+1) * */ float ***bmat; int rank, nodes, ret; double timing, flops; int correctness; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nodes); starpu_cublas_init(); parse_args(argc, argv, nodes); matrix_init(&bmat, rank, nodes, 1); matrix_display(bmat, rank); dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops); starpu_mpi_shutdown(); matrix_display(bmat, rank); dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops); matrix_free(&bmat, rank, nodes, 1); starpu_cublas_shutdown(); starpu_shutdown(); assert(correctness); if (rank == 0) { FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000); FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f)); } return 0; }
int main(int argc, char **argv) { int ret=0, global_ret=0; int rank, size; MPI_Init(&argc, &argv); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (size%2 != 0) { FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = exchange_variable(rank, 0); if (ret != 0) global_ret = ret; ret = exchange_variable(rank, 1); if (ret != 0) global_ret = ret; ret = exchange_void(rank, 0); if (ret != 0) global_ret = ret; ret = exchange_void(rank, 1); if (ret != 0) global_ret = ret; ret = exchange_complex(rank, 0); if (ret != 0) global_ret = ret; ret = exchange_complex(rank, 1); if (ret != 0) global_ret = ret; starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return global_ret; }
int main(int argc, char **argv) { int ret, rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 0) { int n; for(n=1 ; n<size ; n++) { int i, var[2]; MPI_Status status[3]; starpu_data_handle_t handle[2]; FPRINTF_MPI(stderr, "receiving from node %d\n", n); for(i=0 ; i<2 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]); starpu_data_acquire(handle[0], STARPU_R); STARPU_ASSERT_MSG(var[0] == n, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n); starpu_data_release(handle[0]); starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[1]); starpu_mpi_recv(handle[1], n, 44, MPI_COMM_WORLD, &status[2]); for(i=0 ; i<2 ; i++) starpu_data_acquire(handle[i], STARPU_R); STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n); STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n); for(i=0 ; i<2 ; i++) starpu_data_release(handle[i]); for(i=0 ; i<2 ; i++) starpu_data_unregister(handle[i]); } } else { int i, var[3]; starpu_data_handle_t handle[3]; FPRINTF_MPI(stderr, "sending to node %d\n", 0); var[0] = rank; var[1] = var[0] * 2; var[2] = var[0] * 4; for(i=0 ; i<3 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD); starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD); starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD); for(i=0 ; i<3 ; i++) starpu_data_unregister(handle[i]); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size, n, x=789; int rank, other_rank; int ret; starpu_data_handle_t data[2]; MPI_Init(&argc, &argv); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size % 2) { FPRINTF(stderr, "We need a even number of processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } other_rank = rank%2 == 0 ? rank+1 : rank-1; FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); if (rank % 2) { MPI_Send(&rank, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD); FPRINTF(stderr, "[%d] sending %d\n", rank, rank); } else { MPI_Recv(&x, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d] received %d\n", rank, x); } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank % 2) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); starpu_mpi_data_register(data[1], 22, 0); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data[0], 12, 0); if (rank % 2) { starpu_mpi_req req; starpu_mpi_issend(data[1], &req, other_rank, 22, MPI_COMM_WORLD); starpu_mpi_send(data[0], other_rank, 12, MPI_COMM_WORLD); starpu_mpi_wait(&req, MPI_STATUS_IGNORE); } else { int *xx; starpu_mpi_recv(data[0], other_rank, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF_MPI(stderr, "received %d\n", *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data[1], 22, 0); starpu_mpi_recv(data[0], other_rank, 22, MPI_COMM_WORLD, MPI_STATUS_IGNORE); xx = (int *)starpu_variable_get_local_ptr(data[0]); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); } starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int do_test(int rank, int sdetached, int rdetached) { int ret, i; int val[2]; starpu_data_handle_t data[2]; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 1) { val[0] = VAL0; val[1] = VAL1; } else { val[0] = -1; val[1] = -1; } starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0])); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1])); starpu_mpi_data_register(data[0], 77, 1); starpu_mpi_data_register(data[1], 88, 1); if (rank == 1) { for(i=1 ; i>=0 ; i--) { if (sdetached) starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL); else starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD); } } else if (rank == 0) { int received = 0; for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) { if (rdetached) starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received); else starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rdetached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != 2) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } for(i=0 ; i<2 ; i++) starpu_data_acquire(data[i], STARPU_R); for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) starpu_data_release(data[i]); } FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); if (rank == 0) { ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1; } starpu_mpi_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int ret, rank, size; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { int tag = loop*size + rank; if (loop == 0 && rank == 0) { token = 0; FPRINTF(stdout, "Start with token value %u\n", token); } else { starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); } increment_token(); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %u\n", token); starpu_data_release(token_handle); } else { starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); } } starpu_task_wait_for_all(); starpu_data_unregister(token_handle); starpu_mpi_shutdown(); starpu_shutdown(); if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } return 0; }
int main(int argc, char **argv) { int my_rank, size, x, y, loop; float mean=0; float matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_mpi_init(&argc, &argv, 1); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); parse_args(argc, argv); /* Initial data values */ starpu_srand48((long int)time(NULL)); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (float)starpu_drand48(); mean += matrix[x][y]; } } mean /= (X*Y); if (display) { FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); for(x = 0; x < X; x++) { fprintf(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { fprintf(stdout, "%2.2f ", matrix[x][y]); } fprintf(stdout, "\n"); } } /* Initial distribution */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == my_rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); } else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) { /* I don't own that index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); } else { /* I know it's useless to allocate anything for this */ data_handles[x][y] = NULL; } if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } } } /* First computation with initial distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Now migrate data to a new distribution */ /* First register newly needed data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib2(x, y, size); if (!data_handles[x][y] && (mpi_rank == my_rank || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size) || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size))) { /* Register newly-needed data */ starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y])) { /* Migrate the data */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* And register new rank of the matrix */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); } } } /* Second computation with new distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Unregister data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_handles[x][y]) { int mpi_rank = my_distrib(x, y, size); /* Get back data to original place where the user-provided buffer is. */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* Register original rank of the matrix (although useless) */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); /* And unregister it */ starpu_data_unregister(data_handles[x][y]); } } } starpu_mpi_shutdown(); starpu_shutdown(); if (display) { FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean); for(x = 0; x < X; x++) { FPRINTF(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { FPRINTF(stdout, "%2.2f ", matrix[x][y]); } FPRINTF(stdout, "\n"); } } return 0; }
int main(int argc, char **argv) { int rank, size, err; int x[2]; int ret, i; starpu_data_handle_t data_handles[2]; int values[2]; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank == 0) { x[0] = 11; starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0])); starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); } else if (rank == 1) { x[1] = 12; starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1])); } else { starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); } starpu_mpi_data_register(data_handles[0], 0, 0); starpu_mpi_data_register(data_handles[1], 1, 1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], STARPU_EXECUTE_ON_DATA, data_handles[1], 0); assert(err == 0); starpu_task_wait_for_all(); for(i=0 ; i<2 ; i++) { starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL); if (rank == 0) { starpu_data_acquire(data_handles[i], STARPU_R); values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i])); starpu_data_release(data_handles[i]); } } ret = 0; if (rank == 0) { FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]); if (values[0] != 12 || values[1] != 144) { ret = EXIT_FAILURE; } } starpu_data_unregister(data_handles[0]); starpu_data_unregister(data_handles[1]); starpu_mpi_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int rank, nodes; int ret=0; int compare=0; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2) { fprintf(stderr, "This program needs at least 2 nodes (%d available)\n", nodes); ret = 77; } else { starpu_data_handle_t handle; starpu_data_handle_t handle2; double real[2] = {4.0, 2.0}; double imaginary[2] = {7.0, 9.0}; double real2[2] = {14.0, 12.0}; double imaginary2[2] = {17.0, 19.0}; if (rank == 1) { real[0] = 0.0; real[1] = 0.0; imaginary[0] = 0.0; imaginary[1] = 0.0; } starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2); if (rank == 0) { int *compare_ptr = &compare; starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0); starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL); starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL); starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0); starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); } else if (rank == 1) { starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL); starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0); starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL); } starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_data_unregister(handle2); } starpu_mpi_shutdown(); starpu_shutdown(); if (rank == 0) return !compare; else return ret; }
int main(int argc, char **argv) { int ret, rank, size, err, node; long x0=32; int x1=23; starpu_data_handle_t data_handlesx0; starpu_data_handle_t data_handlesx1; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank != 0 && rank != 1) goto end; if (rank == 0) { starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, rank); starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, 1); } else if (rank == 1) { starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, rank); starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, 0); } node = starpu_mpi_data_get_rank(data_handlesx1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, 0); assert(err == 0); node = starpu_mpi_data_get_rank(data_handlesx0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1, 0); assert(err == 0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); node = 1; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r, STARPU_VALUE, &node, sizeof(node), STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data_handlesx0); starpu_data_unregister(data_handlesx1); end: starpu_mpi_shutdown(); starpu_shutdown(); return 0; }
int main(int argc, char **argv) { int rank; int world_size; /* * Initialization */ int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) { fprintf(stderr,"MPI_Init_thread failed\n"); exit(1); } if (thread_support == MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI does not have thread support!\n"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); starpu_srand48((long int)time(NULL)); parse_args(rank, argc, argv); int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* We disable sequential consistency in this example */ starpu_data_set_default_sequential_consistency_flag(0); starpu_mpi_init(NULL, NULL, 0); STARPU_ASSERT(p*q == world_size); starpu_cublas_init(); int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); /* * Problem Init */ init_matrix(rank); fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank, (int)(allocated_memory/(1024*1024)), (int)(allocated_memory_extra/(1024*1024)), (int)((allocated_memory+allocated_memory_extra)/(1024*1024))); display_grid(rank, nblocks); TYPE *a_r = NULL; // STARPU_PLU(display_data_content)(a_r, size); TYPE *x, *y; if (check) { x = calloc(size, sizeof(TYPE)); STARPU_ASSERT(x); y = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { unsigned ind; for (ind = 0; ind < size; ind++) x[ind] = (TYPE)starpu_drand48(); } a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); if (rank == 0) STARPU_PLU(display_data_content)(a_r, size); // STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); } barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size); /* * Report performance */ int reduce_ret; double min_timing = timing; double max_timing = timing; double sum_timing = timing; reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); if (rank == 0) { fprintf(stderr, "Computation took: %f ms\n", max_timing/1000); fprintf(stderr, "\tMIN : %f ms\n", min_timing/1000); fprintf(stderr, "\tMAX : %f ms\n", max_timing/1000); fprintf(stderr, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); unsigned n = size; double flop = (2.0f*n*n*n)/3.0f; fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/max_timing/1000.0f)); } /* * Test Result Correctness */ if (check) { /* * Compute || A - LU || */ STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); #if 0 /* * Compute || Ax - LUx || */ unsigned ind; y2 = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { for (ind = 0; ind < size; ind++) { y2[ind] = (TYPE)0.0; } } STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); /* Compute y2 = y2 - y */ CPU_AXPY(size, -1.0, y, 1, y2, 1); TYPE err = CPU_ASUM(size, y2, 1); int max = CPU_IAMAX(size, y2, 1); fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); #endif } /* * Termination */ barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); starpu_cublas_shutdown(); starpu_mpi_shutdown(); starpu_shutdown(); #if 0 MPI_Finalize(); #endif return 0; }
int main(int argc, char **argv) { int size, x; int color; MPI_Comm newcomm; int rank, newrank; int ret; starpu_data_handle_t data[3]; int value = 90; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 4) { FPRINTF(stderr, "We need at least 4 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } color = rank%2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); MPI_Comm_rank(newcomm, &newrank); FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); if (newrank == 0) { FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); } else if (newrank == 1) { MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 0) { starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int)); } else starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD); if (newrank == 0) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[0], 12, 0, newcomm); if (newrank == 0) { starpu_mpi_req req[2]; starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm); starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm); starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); } else if (newrank == 1) { int *xx; starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE); starpu_data_acquire(data[0], STARPU_RW); xx = (int *)starpu_variable_get_local_ptr(data[0]); starpu_data_release(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE); starpu_data_acquire(data[0], STARPU_RW); xx = (int *)starpu_variable_get_local_ptr(data[0]); starpu_data_release(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); } if (rank == 0) { starpu_data_acquire(data[2], STARPU_RW); int rvalue = *((int *)starpu_variable_get_local_ptr(data[2])); starpu_data_release(data[2]); FPRINTF_MPI(stderr, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1); starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD); starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); starpu_data_release(data[2]); FPRINTF_MPI(stderr, "Value back is %d\n", *xx); STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1))); } else { int next = (rank == size-1) ? 0 : rank+1; starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); FPRINTF_MPI(stderr, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next); *xx = *xx + 2; starpu_data_release(data[2]); starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD); } if (newrank == 0 || newrank == 1) { starpu_mpi_insert_task(newcomm, &mycodelet, STARPU_RW, data[0], STARPU_VALUE, &x, sizeof(x), STARPU_EXECUTE_ON_NODE, 1, 0); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); } starpu_data_unregister(data[2]); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Comm_free(&newcomm); MPI_Finalize(); return 0; }