int main(int argc, char **argv) { int my_rank, size, x, y, loop; float mean=0; float matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_mpi_init(&argc, &argv, 1); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); parse_args(argc, argv); /* Initial data values */ starpu_srand48((long int)time(NULL)); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (float)starpu_drand48(); mean += matrix[x][y]; } } mean /= (X*Y); if (display) { FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); for(x = 0; x < X; x++) { fprintf(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { fprintf(stdout, "%2.2f ", matrix[x][y]); } fprintf(stdout, "\n"); } } /* Initial distribution */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == my_rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); } else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) { /* I don't own that index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); } else { /* I know it's useless to allocate anything for this */ data_handles[x][y] = NULL; } if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } } } /* First computation with initial distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Now migrate data to a new distribution */ /* First register newly needed data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib2(x, y, size); if (!data_handles[x][y] && (mpi_rank == my_rank || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size) || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size))) { /* Register newly-needed data */ starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y])) { /* Migrate the data */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* And register new rank of the matrix */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); } } } /* Second computation with new distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Unregister data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_handles[x][y]) { int mpi_rank = my_distrib(x, y, size); /* Get back data to original place where the user-provided buffer is. */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* Register original rank of the matrix (although useless) */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); /* And unregister it */ starpu_data_unregister(data_handles[x][y]); } } } starpu_mpi_shutdown(); starpu_shutdown(); if (display) { FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean); for(x = 0; x < X; x++) { FPRINTF(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { FPRINTF(stdout, "%2.2f ", matrix[x][y]); } FPRINTF(stdout, "\n"); } } return 0; }
int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) { int rank; int x; struct _callback_arg *callback_arg = NULL; void (*callback_func)(void *) = NULL; void (*callback)(void *); starpu_mpi_comm_rank(comm, &rank); callback = (rank == root) ? scallback : rcallback; if (callback) { callback_func = _callback_collective; callback_arg = malloc(sizeof(struct _callback_arg)); callback_arg->count = 0; callback_arg->nb = 0; callback_arg->callback = (rank == root) ? scallback : rcallback; callback_arg->arg = (rank == root) ? sarg : rarg; for(x = 0; x < count ; x++) { if (data_handles[x]) { int owner = starpu_mpi_data_get_rank(data_handles[x]); int data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { callback_arg->count ++; } if ((rank != root) && (owner == rank)) { callback_arg->count ++; } } } if (!callback_arg->count) { free(callback_arg); return 0; } } for(x = 0; x < count ; x++) { if (data_handles[x]) { int owner = starpu_mpi_data_get_rank(data_handles[x]); int data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { //fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner); starpu_mpi_isend_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg); } if ((rank != root) && (owner == rank)) { //fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, root); starpu_mpi_irecv_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg); } } } return 0; }
int main(int argc, char **argv) { int ret, rank, size, err, node; long x0=32; int x1=23; starpu_data_handle_t data_handlesx0; starpu_data_handle_t data_handlesx1; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank != 0 && rank != 1) goto end; if (rank == 0) { starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, rank); starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, 1); } else if (rank == 1) { starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, rank); starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, 0); } node = starpu_mpi_data_get_rank(data_handlesx1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, 0); assert(err == 0); node = starpu_mpi_data_get_rank(data_handlesx0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1, 0); assert(err == 0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); node = 1; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r, STARPU_VALUE, &node, sizeof(node), STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data_handlesx0); starpu_data_unregister(data_handlesx1); end: starpu_mpi_shutdown(); starpu_shutdown(); return 0; }