int ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, int target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, ompi_win_t *win) { int ret; ompi_osc_rdma_sendreq_t *sendreq; ompi_osc_rdma_module_t *module = GET_MODULE(win); if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && (!module->m_sc_remote_active_ranks[target])) { return MPI_ERR_RMA_SYNC; } if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } /* shortcut 0 count case */ if (0 == origin_count || 0 == target_count) { return OMPI_SUCCESS; } /* create sendreq */ ret = ompi_osc_rdma_sendreq_alloc_init(OMPI_OSC_RDMA_ACC, origin_addr, origin_count, origin_dt, target, target_disp, target_count, target_dt, module, &sendreq); MEMCHECKER( memchecker_convertor_call(&opal_memchecker_base_mem_noaccess, &sendreq->req_origin_convertor); );
int ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win) { unsigned int incoming_reqs; int ret = OMPI_SUCCESS, i; ompi_osc_pt2pt_module_t *module = P2P_MODULE(win); int num_outgoing = 0; if (0 != (assert & MPI_MODE_NOPRECEDE)) { /* check that the user didn't lie to us - since NOPRECEDED must be specified by all processes if it is specified by any process, if we see this it is safe to assume that there are no pending operations anywhere needed to close out this epoch. No need to lock, since it's a lookup and any pending modification of the pending_sendreqs during this time is an erroneous program. */ if (0 != opal_list_get_size(&(module->p2p_pending_sendreqs))) { return MPI_ERR_RMA_SYNC; } } else { opal_list_item_t *item; /* "atomically" copy all the data we're going to be modifying into the copy... */ OPAL_THREAD_LOCK(&(module->p2p_lock)); ompi_osc_pt2pt_flip_sendreqs(module); OPAL_THREAD_UNLOCK(&(module->p2p_lock)); num_outgoing = opal_list_get_size(&(module->p2p_copy_pending_sendreqs)); /* find out how much data everyone is going to send us. */ ret = module->p2p_comm-> c_coll.coll_reduce_scatter(module->p2p_copy_num_pending_sendreqs, &incoming_reqs, module->p2p_fence_coll_counts, MPI_UNSIGNED, MPI_SUM, module->p2p_comm, module->p2p_comm->c_coll.coll_reduce_scatter_module); if (OMPI_SUCCESS != ret) { /* put the stupid data back for the user. This is not cheap, but the user lost his data if we don't. */ OPAL_THREAD_LOCK(&(module->p2p_lock)); opal_list_join(&module->p2p_pending_sendreqs, opal_list_get_end(&module->p2p_pending_sendreqs), &module->p2p_copy_pending_sendreqs); for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) { module->p2p_num_pending_sendreqs[i] += module->p2p_copy_num_pending_sendreqs[i]; } OPAL_THREAD_UNLOCK(&(module->p2p_lock)); return ret; } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output, "fence: waiting on %d in and %d out", module->p2p_num_pending_in, module->p2p_num_pending_out)); /* try to start all the requests. We've copied everything we need out of pending_sendreqs, so don't need the lock here */ while (NULL != (item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) { ompi_osc_pt2pt_sendreq_t *req = (ompi_osc_pt2pt_sendreq_t*) item; ret = ompi_osc_pt2pt_sendreq_send(module, req); if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) { opal_output_verbose(5, ompi_osc_base_output, "complete: failure in starting sendreq (%d). Will try later.", ret); opal_list_append(&(module->p2p_copy_pending_sendreqs), item); } else if (OMPI_SUCCESS != ret) { return ret; } } OPAL_THREAD_LOCK(&module->p2p_lock); /* possible we've already received a couple in messages, so add however many we're going to wait for */ module->p2p_num_pending_in += incoming_reqs; module->p2p_num_pending_out += num_outgoing; /* now we know how many things we're waiting for - wait for them... */ while (module->p2p_num_pending_in > 0 || 0 != module->p2p_num_pending_out) { opal_condition_wait(&module->p2p_cond, &module->p2p_lock); } OPAL_THREAD_UNLOCK(&module->p2p_lock); } /* all transfers are done - back to the real world we go */ if (0 == (assert & MPI_MODE_NOSUCCEED)) { ompi_win_set_mode(win, OMPI_WIN_FENCE); } else { ompi_win_set_mode(win, 0); } return OMPI_SUCCESS; }
/* dispatch for callback on message completion */ static int component_fragment_cb(ompi_request_t *request) { int ret; ompi_osc_pt2pt_buffer_t *buffer; ompi_osc_pt2pt_module_t *module; if (request->req_status._cancelled) { opal_output_verbose(5, ompi_osc_base_framework.framework_output, "pt2pt request was canceled"); return OMPI_ERR_NOT_AVAILABLE; } buffer = (ompi_osc_pt2pt_buffer_t*) request->req_complete_cb_data; module = (ompi_osc_pt2pt_module_t*) buffer->data; assert(request->req_status._ucount >= (int) sizeof(ompi_osc_pt2pt_base_header_t)); /* handle message */ switch (((ompi_osc_pt2pt_base_header_t*) buffer->payload)->hdr_type) { case OMPI_OSC_PT2PT_HDR_PUT: { /* get our header and payload */ ompi_osc_pt2pt_send_header_t *header = (ompi_osc_pt2pt_send_header_t*) buffer->payload; void *payload = (void*) (header + 1); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header); } #endif if (!ompi_win_exposure_epoch(module->p2p_win)) { if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) { ompi_win_set_mode(module->p2p_win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } } ret = ompi_osc_pt2pt_sendreq_recv_put(module, header, payload); } break; case OMPI_OSC_PT2PT_HDR_ACC: { /* get our header and payload */ ompi_osc_pt2pt_send_header_t *header = (ompi_osc_pt2pt_send_header_t*) buffer->payload; void *payload = (void*) (header + 1); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header); } #endif if (!ompi_win_exposure_epoch(module->p2p_win)) { if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) { ompi_win_set_mode(module->p2p_win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } } /* receive into temporary buffer */ ret = ompi_osc_pt2pt_sendreq_recv_accum(module, header, payload); } break; case OMPI_OSC_PT2PT_HDR_GET: { /* get our header and payload */ ompi_osc_pt2pt_send_header_t *header = (ompi_osc_pt2pt_send_header_t*) buffer->payload; void *payload = (void*) (header + 1); ompi_datatype_t *datatype; ompi_osc_pt2pt_replyreq_t *replyreq; ompi_proc_t *proc; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header); } #endif if (!ompi_win_exposure_epoch(module->p2p_win)) { if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) { ompi_win_set_mode(module->p2p_win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH); } } /* create or get a pointer to our datatype */ proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin ); datatype = ompi_osc_base_datatype_create(proc, &payload); if (NULL == datatype) { opal_output(ompi_osc_base_framework.framework_output, "Error recreating datatype. Aborting."); ompi_mpi_abort(module->p2p_comm, 1, false); } /* create replyreq sendreq */ ret = ompi_osc_pt2pt_replyreq_alloc_init(module, header->hdr_origin, header->hdr_origin_sendreq, header->hdr_target_disp, header->hdr_target_count, datatype, &replyreq); /* send replyreq */ ompi_osc_pt2pt_replyreq_send(module, replyreq); /* sendreq does the right retain, so we can release safely */ OBJ_RELEASE(datatype); } break; case OMPI_OSC_PT2PT_HDR_REPLY: { ompi_osc_pt2pt_reply_header_t *header = (ompi_osc_pt2pt_reply_header_t*) buffer->payload; void *payload = (void*) (header + 1); ompi_osc_pt2pt_sendreq_t *sendreq; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_REPLY_HDR_NTOH(*header); } #endif /* get original sendreq pointer */ sendreq = (ompi_osc_pt2pt_sendreq_t*) header->hdr_origin_sendreq.pval; module = sendreq->req_module; /* receive data */ ompi_osc_pt2pt_replyreq_recv(module, sendreq, header, payload); } break; case OMPI_OSC_PT2PT_HDR_POST: { int32_t count; OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_post_msgs -= 1); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count == 0) opal_condition_broadcast(&module->p2p_cond); } break; case OMPI_OSC_PT2PT_HDR_COMPLETE: { ompi_osc_pt2pt_control_header_t *header = (ompi_osc_pt2pt_control_header_t*) buffer->payload; int32_t count; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header); } #endif /* we've heard from one more place, and have value reqs to process */ OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_complete_msgs -= 1); count += (module->p2p_num_pending_in += header->hdr_value[0]); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count == 0) opal_condition_broadcast(&module->p2p_cond); } break; case OMPI_OSC_PT2PT_HDR_LOCK_REQ: { ompi_osc_pt2pt_control_header_t *header = (ompi_osc_pt2pt_control_header_t*) buffer->payload; int32_t count; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header); } #endif if (header->hdr_value[1] > 0) { ompi_osc_pt2pt_passive_lock(module, header->hdr_value[0], header->hdr_value[1]); } else { OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_lock_received_ack += 1); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count != 0) opal_condition_broadcast(&module->p2p_cond); } } break; case OMPI_OSC_PT2PT_HDR_UNLOCK_REQ: { ompi_osc_pt2pt_control_header_t *header = (ompi_osc_pt2pt_control_header_t*) buffer->payload; #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) { OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header); } #endif ompi_osc_pt2pt_passive_unlock(module, header->hdr_value[0], header->hdr_value[1]); } break; case OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY: { int32_t count; OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_pending_out -= 1); OPAL_THREAD_UNLOCK(&module->p2p_lock); if (count == 0) opal_condition_broadcast(&module->p2p_cond); } break; default: opal_output_verbose(5, ompi_osc_base_framework.framework_output, "received one-sided packet for with unknown type"); } ompi_request_free(&request); ret = ompi_osc_pt2pt_component_irecv(buffer->payload, mca_osc_pt2pt_component.p2p_c_eager_size, MPI_BYTE, MPI_ANY_SOURCE, CONTROL_MSG_TAG, module->p2p_comm, &buffer->request, component_fragment_cb, buffer); return ret; }