void insert_work_for_one_element(struct element *el) { starpu_data_handle_t tmp_recv; starpu_data_handle_t tmp_send; starpu_vector_data_register(&tmp_recv, -1, 0, el->tag, sizeof(int)); starpu_vector_data_register(&tmp_send, -1, 0, el->tag, sizeof(int)); //Emulate the work to fill the send buffer starpu_insert_task(&fill_tmp_buffer_cl, STARPU_W,tmp_send, 0); //Send operation starpu_insert_task(&submitted_order, STARPU_RW,el->ensure_submitted_order_send, STARPU_W,tmp_send, 0); starpu_mpi_isend_detached(tmp_send,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); //Recv operation for current element starpu_insert_task(&submitted_order, STARPU_RW,el->ensure_submitted_order_recv, STARPU_W,tmp_recv, 0); starpu_mpi_irecv_detached(tmp_recv,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); //Emulate the "reading" of the recv value. starpu_insert_task(&read_ghost_value_cl, STARPU_R,tmp_recv, 0); starpu_data_unregister_submit(tmp_send); starpu_data_unregister_submit(tmp_recv); }
/* Post MPI recv */ static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, int local_rank) { struct block_description *descr = get_block_description(z); STARPU_ASSERT(descr->mpi_node != local_rank); struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; int source = descr->mpi_node; STARPU_ASSERT(neighbour->mpi_node == local_rank); /* Receive our neighbour's border in our neighbour copy */ starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); }
int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int data_tag, MPI_Comm comm, starpu_tag_t tag) { starpu_tag_t *tagptr = malloc(sizeof(starpu_tag_t)); *tagptr = tag; return starpu_mpi_irecv_detached(data_handle, source, data_tag, comm, starpu_mpi_unlock_tag_callback, tagptr); }
int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag) { struct arg_array *arg = malloc(sizeof(struct arg_array)); arg->array_size = array_size; arg->tag = tag; unsigned elem; for (elem = 0; elem < array_size; elem++) { starpu_mpi_irecv_detached(data_handle[elem], source[elem], data_tag[elem], comm[elem], starpu_mpi_array_unlock_callback, arg); } return 0; }
int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) { int rank; int x; struct _callback_arg *callback_arg = NULL; void (*callback_func)(void *) = NULL; void (*callback)(void *); starpu_mpi_comm_rank(comm, &rank); callback = (rank == root) ? scallback : rcallback; if (callback) { callback_func = _callback_collective; callback_arg = malloc(sizeof(struct _callback_arg)); callback_arg->count = 0; callback_arg->nb = 0; callback_arg->callback = (rank == root) ? scallback : rcallback; callback_arg->arg = (rank == root) ? sarg : rarg; for(x = 0; x < count ; x++) { if (data_handles[x]) { int owner = starpu_mpi_data_get_rank(data_handles[x]); int data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { callback_arg->count ++; } if ((rank != root) && (owner == rank)) { callback_arg->count ++; } } } if (!callback_arg->count) { free(callback_arg); return 0; } } for(x = 0; x < count ; x++) { if (data_handles[x]) { int owner = starpu_mpi_data_get_rank(data_handles[x]); int data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { //fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner); starpu_mpi_isend_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg); } if ((rank != root) && (owner == rank)) { //fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, root); starpu_mpi_irecv_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg); } } } return 0; }
int exchange(int rank, starpu_data_handle_t *handles, check_func func, int detached) { int other_rank = rank%2 == 0 ? rank+1 : rank-1; int i; if (rank%2) { starpu_mpi_send(handles[0], other_rank, 0, MPI_COMM_WORLD); starpu_mpi_send(handles[NB-1], other_rank, NB-1, MPI_COMM_WORLD); for(i=1 ; i<NB-1 ; i++) { starpu_mpi_send(handles[i], other_rank, i, MPI_COMM_WORLD); } return 0; } else { int ret=0; starpu_mpi_req req[NB]; int received = 0; if (detached) { starpu_mpi_irecv_detached(handles[0], other_rank, 0, MPI_COMM_WORLD, callback, &received); } else { memset(req, 0, NB*sizeof(starpu_mpi_req)); starpu_mpi_irecv(handles[0], &req[0], other_rank, 0, MPI_COMM_WORLD); STARPU_ASSERT(req[0] != NULL); } // We sleep to make sure that the data for the tag 9 will be received before the recv is posted usleep(2000000); for(i=1 ; i<NB ; i++) { if (detached) { starpu_mpi_irecv_detached(handles[i], other_rank, i, MPI_COMM_WORLD, callback, &received); } else { starpu_mpi_irecv(handles[i], &req[i], other_rank, i, MPI_COMM_WORLD); STARPU_ASSERT(req[i] != NULL); } } if (detached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != NB) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { for(i=0 ; i<NB ; i++) { starpu_mpi_wait(&req[i], MPI_STATUS_IGNORE); func(handles[i], i, rank, &ret); } } return ret; } }
int do_test(int rank, int sdetached, int rdetached) { int ret, i; int val[2]; starpu_data_handle_t data[2]; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 1) { val[0] = VAL0; val[1] = VAL1; } else { val[0] = -1; val[1] = -1; } starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0])); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1])); starpu_mpi_data_register(data[0], 77, 1); starpu_mpi_data_register(data[1], 88, 1); if (rank == 1) { for(i=1 ; i>=0 ; i--) { if (sdetached) starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL); else starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD); } } else if (rank == 0) { int received = 0; for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) { if (rdetached) starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received); else starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rdetached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != 2) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } for(i=0 ; i<2 ; i++) starpu_data_acquire(data[i], STARPU_R); for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) starpu_data_release(data[i]); } FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); if (rank == 0) { ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1; } starpu_mpi_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int ret, rank, size; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { int tag = loop*size + rank; if (loop == 0 && rank == 0) { token = 0; FPRINTF(stdout, "Start with token value %u\n", token); } else { starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); } increment_token(); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %u\n", token); starpu_data_release(token_handle); } else { starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); } } starpu_task_wait_for_all(); starpu_data_unregister(token_handle); starpu_mpi_shutdown(); starpu_shutdown(); if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } return 0; }
int main(int argc, char **argv) { int rank, nodes; int ret=0; int compare=0; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2) { fprintf(stderr, "This program needs at least 2 nodes (%d available)\n", nodes); ret = 77; } else { starpu_data_handle_t handle; starpu_data_handle_t handle2; double real[2] = {4.0, 2.0}; double imaginary[2] = {7.0, 9.0}; double real2[2] = {14.0, 12.0}; double imaginary2[2] = {17.0, 19.0}; if (rank == 1) { real[0] = 0.0; real[1] = 0.0; imaginary[0] = 0.0; imaginary[1] = 0.0; } starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2); if (rank == 0) { int *compare_ptr = &compare; starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0); starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL); starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL); starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0); starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); } else if (rank == 1) { starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL); starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0); starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL); } starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_data_unregister(handle2); } starpu_mpi_shutdown(); starpu_shutdown(); if (rank == 0) return !compare; else return ret; }