int ompi_coll_portals4_ibarrier_intra_fini(ompi_coll_portals4_request_t *request) { int ret; /* cleanup */ ret = PtlMEUnlink(request->me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlCTFree(request->ct_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTFree failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&request->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); return OMPI_SUCCESS; }
static void mca_pml_yalla_send_completion_cb(void *context) { mca_pml_yalla_send_request_t* sreq = context; switch (sreq->mxm.base.error) { case MXM_OK: sreq->super.ompi.req_status.MPI_ERROR = OMPI_SUCCESS; break; case MXM_ERR_CANCELED: sreq->super.ompi.req_status._cancelled = true; break; default: sreq->super.ompi.req_status.MPI_ERROR = MPI_ERR_INTERN; break; } PML_YALLA_VERBOSE(8, "send request %p completed with status %s", (void *)sreq, mxm_error_string(sreq->mxm.base.error)); ompi_request_complete(&sreq->super.ompi, true); if (sreq->super.flags & MCA_PML_YALLA_REQUEST_FLAG_FREE_CALLED) { PML_YALLA_VERBOSE(7, "release request %p because free was already called", (void *)sreq); mca_pml_yalla_request_release(&sreq->super, &ompi_pml_yalla.send_reqs); } }
int mca_pml_yalla_isend(void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request) { mca_pml_yalla_send_request_t *sreq; mxm_error_t error; int rc; sreq = MCA_PML_YALLA_SREQ_INIT(buf, count, datatype, dst, tag, mode, comm, OMPI_REQUEST_ACTIVE); sreq->super.ompi.req_persistent = false; sreq->super.flags = 0; PML_YALLA_VERBOSE(8, "send request *%p=%p to %d mode %d tag %d dtype %s count %zu", (void *)request, (void *)sreq, dst, mode, tag, datatype->name, count); if (mode == MCA_PML_BASE_SEND_BUFFERED) { rc = mca_pml_yalla_bsend(&sreq->mxm); OPAL_THREAD_LOCK(&ompi_request_lock); sreq->super.ompi.req_status.MPI_ERROR = rc; ompi_request_complete(&sreq->super.ompi, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = &sreq->super.ompi; return rc; } error = mxm_req_send(&sreq->mxm); if (MXM_OK != error) { return OMPI_ERROR; } *request = &sreq->super.ompi; return OMPI_SUCCESS; }
int ompi_osc_ucx_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **request) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; ompi_osc_ucx_request_t *ucx_req = NULL; int ret = OMPI_SUCCESS; ret = check_sync_state(module, target, true); if (ret != OMPI_SUCCESS) { return ret; } OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); if (NULL == ucx_req) { return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } ret = ompi_osc_ucx_accumulate(origin_addr, origin_count, origin_dt, target, target_disp, target_count, target_dt, op, win); if (ret != OMPI_SUCCESS) { return ret; } ompi_request_complete(&ucx_req->super, true); *request = &ucx_req->super; return ret; }
static int progress_callback(void) { int ret, count = 0; ptl_event_t ev; ompi_osc_portals4_request_t *req; int32_t ops; while (true) { ret = PtlEQGet(mca_osc_portals4_component.matching_eq_h, &ev); if (PTL_OK == ret) { goto process; } else if (PTL_EQ_DROPPED == ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet failed: %d\n", __FILE__, __LINE__, ret); return 0; } process: if (ev.ni_fail_type != PTL_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: event failure: %d %d", __FILE__, __LINE__, ev.type, ev.ni_fail_type); return 0; } count++; if (NULL != ev.user_ptr) { /* be sure that we receive the PTL_EVENT_LINK */ if (ev.type == PTL_EVENT_LINK) { *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; opal_condition_broadcast(&mca_osc_portals4_component.cond); continue; } req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); if (ops == req->ops_expected) { OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&req->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); } } } return count; }
/* * Beware the odd semantics listed in MPI-2:8.2... See the comment in * the grequest destructor. * * First do the normal stuff to complete the request (i.e., call * ompi_request_complete()). Then, if this request object was * previously freed via MPI_REQUEST_FREE, release it. */ int ompi_grequest_complete(ompi_request_t *req) { int rc; OPAL_THREAD_LOCK(&ompi_request_lock); rc = ompi_request_complete(req, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); OBJ_RELEASE(req); return rc; }
int mca_pml_yalla_start(size_t count, ompi_request_t** requests) { mca_pml_yalla_base_request_t *req; mxm_error_t error; size_t i; int rc; for (i = 0; i < count; ++i) { req = (mca_pml_yalla_base_request_t *)requests[i]; if ((req == NULL) || (OMPI_REQUEST_PML != req->ompi.req_type)) { /* Skip irrelevant requests */ continue; } PML_YALLA_ASSERT(req->ompi.req_state != OMPI_REQUEST_INVALID); PML_YALLA_RESET_OMPI_REQ(&req->ompi, OMPI_REQUEST_ACTIVE); if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_SEND) { mca_pml_yalla_send_request_t *sreq; sreq = (mca_pml_yalla_send_request_t *)req; PML_YALLA_RESET_PML_REQ(req, PML_YALLA_MXM_REQBASE(sreq)); if (req->flags & MCA_PML_YALLA_REQUEST_FLAG_BSEND) { PML_YALLA_VERBOSE(8, "start bsend request %p", (void *)sreq); rc = mca_pml_yalla_bsend(&sreq->mxm); sreq->super.ompi.req_status.MPI_ERROR = rc; ompi_request_complete(&sreq->super.ompi, true); if (OMPI_SUCCESS != rc) { return rc; } } else { PML_YALLA_VERBOSE(8, "start send request %p", (void *)sreq); error = mxm_req_send(&sreq->mxm); if (MXM_OK != error) { return OMPI_ERROR; } } } else { mca_pml_yalla_recv_request_t *rreq; rreq = (mca_pml_yalla_recv_request_t *)req; PML_YALLA_RESET_PML_REQ(req, PML_YALLA_MXM_REQBASE(rreq)); PML_YALLA_VERBOSE(8, "start recv request %p", (void *)req); error = mxm_req_recv(&rreq->mxm); if (MXM_OK != error) { return OMPI_ERROR; } } } return OMPI_SUCCESS; }
static void mca_pml_ucx_blocking_recv_completion(void *request, ucs_status_t status, ucp_tag_recv_info_t *info) { ompi_request_t *req = request; PML_UCX_VERBOSE(8, "blocking receive request %p completed with status %s tag %"PRIx64" len %zu", (void*)req, ucs_status_string(status), info->sender_tag, info->length); mca_pml_ucx_set_recv_status(&req->req_status, status, info); PML_UCX_ASSERT( !(REQUEST_COMPLETE(req))); ompi_request_complete(req,true); }
static int progress() { opal_list_item_t *item, *next; int ret, flag, count; ROMIO_PREFIX(MPIO_Request) romio_rq; mca_io_base_request_t *ioreq; /* Troll through all pending requests and try to progress them. If a request finishes, remove it from the list. */ count = 0; OPAL_THREAD_LOCK (&mca_io_romio_mutex); for (item = opal_list_get_first(&mca_io_romio_pending_requests); item != opal_list_get_end(&mca_io_romio_pending_requests); item = next) { next = opal_list_get_next(item); ioreq = (mca_io_base_request_t*) item; romio_rq = ((mca_io_romio_request_t *) item)->romio_rq; ret = ROMIO_PREFIX(MPIO_Test)(&romio_rq, &flag, &(((ompi_request_t *) item)->req_status)); if ((0 != ret) || (0 != flag)) { ioreq->super.req_status.MPI_ERROR = ret; ++count; /* we're done, so remove us from the pending list */ opal_list_remove_item(&mca_io_romio_pending_requests, item); /* mark as complete (and make sure to wake up any waiters */ ompi_request_complete((ompi_request_t*) item); mca_io_base_request_progress_del(); /* if the request has been freed already, the user isn't * going to call test or wait on us, so we need to do it * here */ if (ioreq->free_called) { ret = ompi_request_free((ompi_request_t**) &ioreq); if (OMPI_SUCCESS != ret) { OPAL_THREAD_UNLOCK(&mca_io_romio_mutex); return count; } } } } OPAL_THREAD_UNLOCK (&mca_io_romio_mutex); /* Return how many requests completed */ return count; }
static void mca_pml_yalla_recv_completion_cb(void *context) { mca_pml_yalla_recv_request_t* rreq = context; PML_YALLA_SET_RECV_STATUS(&rreq->mxm, rreq->mxm.completion.actual_len, &rreq->super.ompi.req_status); PML_YALLA_VERBOSE(8, "receive request %p completed with status %s source %d rtag %d(%d/0x%x) len %zu", (void *)rreq, mxm_error_string(rreq->mxm.base.error), rreq->mxm.completion.sender_imm, rreq->mxm.completion.sender_tag, rreq->mxm.tag, rreq->mxm.tag_mask, rreq->mxm.completion.actual_len); ompi_request_complete(&rreq->super.ompi, true); if (rreq->super.flags & MCA_PML_YALLA_REQUEST_FLAG_FREE_CALLED) { PML_YALLA_VERBOSE(7, "release request %p because free was already called", (void *)rreq); mca_pml_yalla_request_release(&rreq->super, &ompi_pml_yalla.recv_reqs); } }
static void coll_handle_complete(void *handle) { ompi_request_t *ompi_req = (ompi_request_t *)handle; ompi_request_complete(ompi_req,true); }
int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_ompio_request_t *ompio_req=NULL; size_t spc=0; if (fh->f_amode & MPI_MODE_WRONLY){ // opal_output(10, "Improper use of FILE Mode, Using WRONLY for Read!\n"); ret = MPI_ERR_ACCESS; return ret; } mca_common_ompio_request_alloc ( &ompio_req, MCA_OMPIO_REQUEST_READ); if ( 0 == count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; ompi_request_complete (&ompio_req->req_ompi, false); *request = (ompi_request_t *) ompio_req; return OMPI_SUCCESS; } if ( NULL != fh->f_fbtl->fbtl_ipreadv ) { // This fbtl has support for non-blocking operations size_t total_bytes_read = 0; /* total bytes that have been read*/ uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; size_t max_data = 0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { char *tbuf=NULL; OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&ompio_req->req_convertor,max_data,decoded_iov,iov_count); ompio_req->req_tbuf = tbuf; ompio_req->req_size = max_data; } else { mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); } #else mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); #endif if ( 0 < max_data && 0 == fh->f_iov_count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; ompi_request_complete (&ompio_req->req_ompi, false); *request = (ompi_request_t *) ompio_req; return OMPI_SUCCESS; } // Non-blocking operations have to occur in a single cycle j = fh->f_index_in_file_view; mca_common_ompio_build_io_array ( fh, 0, // index 1, // no. of cyces max_data, // setting bytes per cycle to match data max_data, iov_count, decoded_iov, &i, &j, &total_bytes_read, &spc, &fh->f_io_array, &fh->f_num_of_io_entries); if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req); } mca_common_ompio_register_progress (); fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } if (NULL != decoded_iov) { free (decoded_iov); decoded_iov = NULL; } } else { // This fbtl does not support non-blocking operations ompi_status_public_t status; ret = mca_common_ompio_file_read (fh, buf, count, datatype, &status); ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; ompi_request_complete (&ompio_req->req_ompi, false); } *request = (ompi_request_t *) ompio_req; return ret; }
int ompio_io_ompio_file_iread (mca_io_ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_ompio_request_t *ompio_req=NULL; ompio_req = OBJ_NEW(mca_ompio_request_t); ompio_req->req_type = MCA_OMPIO_REQUEST_READ; ompio_req->req_ompi.req_state = OMPI_REQUEST_ACTIVE; if ( 0 == count ) { ompi_request_complete (&ompio_req->req_ompi, 0); ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; return OMPI_SUCCESS; } if ( NULL != fh->f_fbtl->fbtl_ipreadv ) { // This fbtl has support for non-blocking operations size_t total_bytes_read = 0; /* total bytes that have been read*/ uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; size_t max_data = 0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ ompi_io_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); // Non-blocking operations have to occur in a single cycle j = fh->f_index_in_file_view; mca_io_ompio_build_io_array ( fh, 0, // index 1, // no. of cyces max_data, // setting bytes per cycle to match data max_data, iov_count, decoded_iov, &i, &j, &total_bytes_read); if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req); } if ( false == mca_io_ompio_progress_is_registered ) { // Lazy initialization of progress function to minimize impact // on other ompi functionality in case its not used. opal_progress_register (mca_io_ompio_component_progress); mca_io_ompio_progress_is_registered=true; } fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } if (NULL != decoded_iov) { free (decoded_iov); decoded_iov = NULL; } } else { // This fbtl does not support non-blocking operations ompi_status_public_t status; ret = ompio_io_ompio_file_read (fh, buf, count, datatype, &status); ompi_request_complete (&ompio_req->req_ompi, 0); ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; } *request = (ompi_request_t *) ompio_req; return ret; }
int mca_pml_ucx_start(size_t count, ompi_request_t** requests) { mca_pml_ucx_persistent_request_t *preq; ompi_request_t *tmp_req; size_t i; for (i = 0; i < count; ++i) { preq = (mca_pml_ucx_persistent_request_t *)requests[i]; if ((preq == NULL) || (OMPI_REQUEST_PML != preq->ompi.req_type)) { /* Skip irrelevant requests */ continue; } PML_UCX_ASSERT(preq->ompi.req_state != OMPI_REQUEST_INVALID); preq->ompi.req_state = OMPI_REQUEST_ACTIVE; mca_pml_ucx_request_reset(&preq->ompi); if (preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) { /* TODO special care to sync/buffered send */ PML_UCX_VERBOSE(8, "start send request %p", (void*)preq); tmp_req = (ompi_request_t*)ucp_tag_send_nb(preq->send.ep, preq->buffer, preq->count, preq->datatype, preq->tag, mca_pml_ucx_psend_completion); } else { PML_UCX_VERBOSE(8, "start recv request %p", (void*)preq); tmp_req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, preq->buffer, preq->count, preq->datatype, preq->tag, preq->recv.tag_mask, mca_pml_ucx_precv_completion); } if (tmp_req == NULL) { /* Only send can complete immediately */ PML_UCX_ASSERT(preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND); PML_UCX_VERBOSE(8, "send completed immediately, completing persistent request %p", (void*)preq); mca_pml_ucx_set_send_status(&preq->ompi.req_status, UCS_OK); ompi_request_complete(&preq->ompi, true); } else if (!UCS_PTR_IS_ERR(tmp_req)) { if (REQUEST_COMPLETE(tmp_req)) { /* tmp_req is already completed */ PML_UCX_VERBOSE(8, "completing persistent request %p", (void*)preq); mca_pml_ucx_persistent_request_complete(preq, tmp_req); } else { /* tmp_req would be completed by callback and trigger completion * of preq */ PML_UCX_VERBOSE(8, "temporary request %p will complete persistent request %p", (void*)tmp_req, (void*)preq); tmp_req->req_complete_cb_data = preq; preq->tmp_req = tmp_req; } } else { PML_UCX_ERROR("ucx %s failed: %s", (preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) ? "send" : "recv", ucs_status_string(UCS_PTR_STATUS(tmp_req))); return OMPI_ERROR; } } return OMPI_SUCCESS; }
int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_ompio_request_t *ompio_req=NULL; size_t spc=0; if (fh->f_amode & MPI_MODE_RDONLY){ // opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n"); ret = MPI_ERR_READ_ONLY; return ret; } mca_common_ompio_request_alloc ( &ompio_req, MCA_OMPIO_REQUEST_WRITE); if ( 0 == count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; ompi_request_complete (&ompio_req->req_ompi, false); *request = (ompi_request_t *) ompio_req; return OMPI_SUCCESS; } if ( NULL != fh->f_fbtl->fbtl_ipwritev ) { /* This fbtl has support for non-blocking operations */ uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; size_t max_data = 0; size_t total_bytes_written =0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { size_t pos=0; char *tbuf=NULL; opal_convertor_t convertor; OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count); opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos ); opal_convertor_cleanup (&convertor); ompio_req->req_tbuf = tbuf; ompio_req->req_size = max_data; } else { mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); } #else mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); #endif if ( 0 < max_data && 0 == fh->f_iov_count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; ompi_request_complete (&ompio_req->req_ompi, false); *request = (ompi_request_t *) ompio_req; return OMPI_SUCCESS; } j = fh->f_index_in_file_view; /* Non blocking operations have to occur in a single cycle */ mca_common_ompio_build_io_array ( fh, 0, // index of current cycle iteration 1, // number of cycles max_data, // setting bytes_per_cycle to max_data max_data, iov_count, decoded_iov, &i, &j, &total_bytes_written, &spc); if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req); } mca_common_ompio_register_progress (); fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } if (NULL != decoded_iov) { free (decoded_iov); decoded_iov = NULL; } } else { // This fbtl does not support non-blocking write operations ompi_status_public_t status; ret = mca_common_ompio_file_write(fh,buf,count,datatype, &status); ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; ompi_request_complete (&ompio_req->req_ompi, false); } *request = (ompi_request_t *) ompio_req; return ret; }
static int ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm, ompi_coll_portals4_request_t *request) { int ret, line; OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank)); ret = cleanup_scatter_handles(request); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } ret = cleanup_sync_handles(request); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } if (NULL != request->u.scatter.unpack_dst_buf) { uint32_t iov_count = 1; struct iovec iov; size_t max_data; ompi_coll_portals4_create_recv_converter (&request->u.scatter.recv_converter, request->u.scatter.unpack_dst_buf, ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), request->u.scatter.unpack_dst_count, request->u.scatter.unpack_dst_dtype); iov.iov_len = request->u.scatter.packed_size; if (request->u.scatter.my_rank == request->u.scatter.root_rank) { /* unpack my data from the location in scatter_buf where is was packed */ uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * request->u.scatter.my_rank; iov.iov_base = (IOVBASE_TYPE *)((char *)request->u.scatter.scatter_buf + offset); } else { iov.iov_base = (IOVBASE_TYPE *)request->u.scatter.scatter_buf; } opal_convertor_unpack(&request->u.scatter.recv_converter, &iov, &iov_count, &max_data); OBJ_DESTRUCT(&request->u.scatter.recv_converter); } if (request->u.scatter.free_after) free(request->u.scatter.scatter_buf); request->super.req_status.MPI_ERROR = OMPI_SUCCESS; OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&request->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank)); return OMPI_SUCCESS; err_hdlr: request->super.req_status.MPI_ERROR = ret; if (request->u.scatter.free_after) free(request->u.scatter.scatter_buf); opal_output(ompi_coll_base_framework.framework_output, "%s:%4d:%4d\tError occurred ret=%d, rank %2d", __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); return ret; }
int ompio_io_ompio_file_iwrite (mca_io_ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_ompio_request_t *ompio_req=NULL; ompio_req = OBJ_NEW(mca_ompio_request_t); ompio_req->req_type = MCA_OMPIO_REQUEST_WRITE; if ( 0 == count ) { ompi_request_complete (&ompio_req->req_ompi, 0); ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; return OMPI_SUCCESS; } if ( NULL != fh->f_fbtl->fbtl_ipwritev ) { /* This fbtl has support for non-blocking operations */ uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; size_t max_data = 0; size_t total_bytes_written =0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ ompi_io_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); j = fh->f_index_in_file_view; /* Non blocking operations have to occur in a single cycle */ mca_io_ompio_build_io_array ( fh, 0, // index of current cycle iteration 1, // number of cycles max_data, // setting bytes_per_cycle to max_data max_data, iov_count, decoded_iov, &i, &j, &total_bytes_written); if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipwritev (fh, request); } fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } if (NULL != decoded_iov) { free (decoded_iov); decoded_iov = NULL; } } else { // This fbtl does not support non-blocking write operations ompi_status_public_t status; ret = ompio_io_ompio_file_write(fh,buf,count,datatype, &status); ompi_request_complete (&ompio_req->req_ompi, 0); ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; } *request = (ompi_request_t *) ompio_req; return ret; }
int ompi_osc_ucx_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, struct ompi_request_t **request) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); uint64_t remote_addr = (module->state_info_array[target]).addr + OSC_UCX_STATE_REQ_FLAG_OFFSET; ucp_rkey_h rkey; ompi_osc_ucx_request_t *ucx_req = NULL; ompi_osc_ucx_internal_request_t *internal_req = NULL; ucs_status_t status; int ret = OMPI_SUCCESS; ret = check_sync_state(module, target, true); if (ret != OMPI_SUCCESS) { return ret; } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { status = get_dynamic_win_info(remote_addr, module, ep, target); if (status != UCS_OK) { return OMPI_ERROR; } } rkey = (module->win_info_array[target]).rkey; OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); if (NULL == ucx_req) { return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } ret = ompi_osc_ucx_get(origin_addr, origin_count, origin_dt, target, target_disp, target_count, target_dt, win); if (ret != OMPI_SUCCESS) { return ret; } status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker); if (status != UCS_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_worker_fence failed: %d\n", __FILE__, __LINE__, status); return OMPI_ERROR; } internal_req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_FADD, 0, &(module->req_result), sizeof(uint64_t), remote_addr, rkey, req_completion); if (UCS_PTR_IS_PTR(internal_req)) { internal_req->external_req = ucx_req; mca_osc_ucx_component.num_incomplete_req_ops++; } else { ompi_request_complete(&ucx_req->super, true); } *request = &ucx_req->super; return incr_and_check_ops_num(module, target, ep); }
int ompi_coll_libnbc_ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_1_0_t *module) { int peer, rank, maxr, p, r, res, count, offset, firstred; MPI_Aint ext; char *redbuf, *sbuf, inplace; NBC_Schedule *schedule; NBC_Handle *handle; ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); res = NBC_Init_handle(comm, coll_req, libnbc_module); if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } res = MPI_Comm_size(comm, &p); if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } if(p==1) { if(!inplace) { /* single node not in_place: copy data to recvbuf */ res = NBC_Copy(sendbuf, recvcounts[0], datatype, recvbuf, recvcounts[0], datatype, comm); if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } } /* manually complete the request */ (*request)->req_status.MPI_ERROR = OMPI_SUCCESS; OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(*request, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); return NBC_OK; } handle = (*coll_req); res = MPI_Comm_rank(comm, &rank); if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } res = MPI_Type_extent(datatype, &ext); if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } res = NBC_Sched_create(schedule); if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } maxr = (int)ceil((log((double)p)/LOG2)); count = 0; for(r=0;r<p;r++) count += recvcounts[r]; handle->tmpbuf = malloc(ext*count*2); if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } redbuf = ((char*)handle->tmpbuf)+(ext*count); firstred = 1; for(r=1; r<=maxr; r++) { if((rank % (1<<r)) == 0) { /* we have to receive this round */ peer = rank + (1<<(r-1)); if(peer<p) { res = NBC_Sched_recv(0, true, count, datatype, peer, schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } /* we have to wait until we have the data */ res = NBC_Sched_barrier(schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } if(firstred) { /* take reduce data from the sendbuf in the first round -> save copy */ res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); firstred = 0; } else { /* perform the reduce in my local buffer */ res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); } if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } /* this cannot be done until handle->tmpbuf is unused :-( */ res = NBC_Sched_barrier(schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } } } else { /* we have to send this round */ peer = rank - (1<<(r-1)); if(firstred) { /* we have to send the senbuf */ res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); } else { /* we send an already reduced value from redbuf */ res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); } if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } /* leave the game */ break; } } res = NBC_Sched_barrier(schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } /* rank 0 is root and sends - all others receive */ if(rank != 0) { res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } } if(rank == 0) { offset = 0; for(r=1;r<p;r++) { offset += recvcounts[r-1]; sbuf = ((char *)redbuf) + (offset*ext); /* root sends the right buffer to the right receiver */ res = NBC_Sched_send(sbuf-(unsigned long)handle->tmpbuf, true, recvcounts[r], datatype, r, schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } } res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcounts[0], datatype, recvbuf, false, recvcounts[0], datatype, schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } } /*NBC_PRINT_SCHED(*schedule);*/ res = NBC_Sched_commit(schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } res = NBC_Start(handle, schedule); if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } /* tmpbuf is freed with the handle */ return NBC_OK; }