static int read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { int ret; #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif ret = PtlGet(ompi_mtl_portals4.send_md_h, (ptl_size_t) start, length, target, ompi_mtl_portals4.read_idx, match_bits, remote_offset, request); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } return OMPI_SUCCESS; }
int ompi_mtl_portals4_send(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode) { int ret = OMPI_SUCCESS; ompi_mtl_portals4_send_request_t ptl_request; ptl_request.complete = false; ptl_request.retval = OMPI_SUCCESS; ptl_request.super.super.type = portals4_req_send; ptl_request.super.super.event_callback = ompi_mtl_portals4_send_callback; ret = ompi_mtl_portals4_send_start(mtl, comm, dest, tag, convertor, mode, &ptl_request.super); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (NULL != ptl_request.super.buffer_ptr) { free(ptl_request.super.buffer_ptr); } return ret; } while (false == ptl_request.complete) { ompi_mtl_portals4_progress(); } ret = ptl_request.retval; return ret; }
static int read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { ptl_md_t md; int ret; /* FIX ME: This needs to be on the send eq... */ md.start = start; md.length = length; md.options = 0; md.eq_handle = ompi_mtl_portals4.send_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &request->md_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif ret = PtlGet(request->md_h, 0, md.length, target, ompi_mtl_portals4.read_idx, match_bits, remote_offset, request); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); PtlMDRelease(request->md_h); return OMPI_ERR_OUT_OF_RESOURCE; } return OMPI_SUCCESS; }
int ompi_mtl_portals4_cancel(struct mca_mtl_base_module_t* mtl, mca_mtl_request_t *mtl_request, int flag) { ompi_mtl_portals4_base_request_t *base_request = (ompi_mtl_portals4_base_request_t*) mtl_request; int ret; switch (base_request->type) { case portals4_req_isend: /* can't cancel sends yet */ break; case portals4_req_recv: { ompi_mtl_portals4_recv_request_t *recvreq = (ompi_mtl_portals4_recv_request_t*) base_request; /* Cancel receive requests if not yet matched (otherwise, they are guaranteed to complete and don't need to be cancelled). If the me_h is already INVALID, that means that not only has matching occurred, but the communication end event has been seen. If MEUnlink fails, that means that either something bad has happened or the ME is in use (meaning no cancel). Need to drain queue to make sure there isn't a pending receive completion event... */ ompi_mtl_portals4_progress(); if (PTL_INVALID_HANDLE != recvreq->me_h) { ret = PtlMEUnlink(recvreq->me_h); if (OPAL_UNLIKELY(PTL_OK == ret)) { recvreq->super.super.ompi_req->req_status._cancelled = true; recvreq->super.super.completion_callback(&recvreq->super.super); } } } break; default: return OMPI_ERROR; } return OMPI_SUCCESS; }
int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl) { opal_progress_unregister(ompi_mtl_portals4_progress); while (0 != ompi_mtl_portals4_progress()) { } ompi_mtl_portals4_recv_short_fini(&ompi_mtl_portals4); PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); PtlMDRelease(ompi_mtl_portals4.zero_md_h); PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.send_idx); PtlEQFree(ompi_mtl_portals4.eq_h); PtlNIFini(ompi_mtl_portals4.ni_h); PtlFini(); return OMPI_SUCCESS; }
int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl) { opal_progress_unregister(ompi_mtl_portals4_progress); while (0 != ompi_mtl_portals4_progress()) { } #if OMPI_MTL_PORTALS4_FLOW_CONTROL ompi_mtl_portals4_flowctl_fini(); #endif ompi_mtl_portals4_recv_short_fini(); PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); PtlMDRelease(ompi_mtl_portals4.zero_md_h); #if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE { int i; int num_mds = ompi_mtl_portals4_get_num_mds(); for (i = 0 ; i < num_mds ; ++i) { PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]); } free(ompi_mtl_portals4.send_md_hs); } #else PtlMDRelease(ompi_mtl_portals4.send_md_h); #endif PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); PtlEQFree(ompi_mtl_portals4.send_eq_h); PtlEQFree(ompi_mtl_portals4.recv_eq_h); PtlNIFini(ompi_mtl_portals4.ni_h); PtlFini(); return OMPI_SUCCESS; }
static int read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { int ret, i; ptl_size_t rest = length, asked = 0; int32_t frag_count; #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count); for (i = 0 ; i < frag_count ; i++) { opal_free_list_item_t *tmp; ompi_mtl_portals4_rndv_get_frag_t* frag; tmp = opal_free_list_get (&ompi_mtl_portals4.fl_rndv_get_frag); if (NULL == tmp) return OMPI_ERR_OUT_OF_RESOURCE; frag = (ompi_mtl_portals4_rndv_get_frag_t*) tmp; frag->request = request; #if OPAL_ENABLE_DEBUG frag->frag_num = i; #endif frag->frag_start = (char*)start + i * ompi_mtl_portals4.max_msg_size_mtl; frag->frag_length = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest; frag->frag_target = target; frag->frag_match_bits = match_bits; frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl; frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress; frag->frag_abs_timeout_usec = 0; OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send", i + 1, frag_count, frag->frag_length)); ret = PtlGet(ompi_mtl_portals4.send_md_h, (ptl_size_t) frag->frag_start, frag->frag_length, frag->frag_target, ompi_mtl_portals4.read_idx, frag->frag_match_bits, frag->frag_remote_offset, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } rest -= frag->frag_length; asked += frag->frag_length; } return OMPI_SUCCESS; }
int ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, struct opal_convertor_t *convertor, mca_mtl_request_t *mtl_request) { ptl_match_bits_t match_bits, ignore_bits; int ret = OMPI_SUCCESS; ptl_process_t remote_proc; ompi_mtl_portals4_recv_request_t *ptl_request = (ompi_mtl_portals4_recv_request_t*) mtl_request; void *start; size_t length; bool free_after; ptl_me_t me; if (MPI_ANY_SOURCE == src) { if (ompi_mtl_portals4.use_logical) { remote_proc.rank = PTL_RANK_ANY; } else { remote_proc.phys.nid = PTL_NID_ANY; remote_proc.phys.pid = PTL_PID_ANY; } } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, src, tag); ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } ptl_request->super.type = portals4_req_recv; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; #if OPAL_ENABLE_DEBUG ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->convertor = convertor; ptl_request->delivery_ptr = start; ptl_request->delivery_len = length; ptl_request->req_started = false; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; ptl_request->pending_reply = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n", ptl_request->opcount, remote_proc.phys.nid, remote_proc.phys.pid, (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request)); me.start = start; me.length = length; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE; if (length <= ompi_mtl_portals4.short_limit) { me.options |= PTL_ME_EVENT_LINK_DISABLE; } me.match_id = remote_proc; me.match_bits = match_bits; me.ignore_bits = ignore_bits; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, PTL_PRIORITY_LIST, ptl_request, &ptl_request->me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); return ompi_mtl_portals4_get_error(ret); } /* if a long message, spin until we either have a comm event or a link event, guaranteeing progress for long unexpected messages. */ if (length > ompi_mtl_portals4.short_limit) { while (true != ptl_request->req_started) { ompi_mtl_portals4_progress(); } } return OMPI_SUCCESS; }
static int start_recover(void) { int ret; int64_t epoch_counter; ompi_mtl_portals4.flowctl.flowctl_active = true; epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Entering flowctl_start_recover %ld", epoch_counter); /* re-arm trigger/alarm for next time */ ret = setup_alarm(epoch_counter); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d setup_alarm failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* setup barrier tree for getting us out of flow control */ ret = setup_barrier(epoch_counter); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d setup_barrier failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* drain all pending sends */ while (ompi_mtl_portals4.flowctl.send_slots != ompi_mtl_portals4.flowctl.max_send_slots) { opal_progress(); } /* drain event queue */ while (0 != ompi_mtl_portals4_progress()) { ; } /* check short block active count */ ret = ompi_mtl_portals4_recv_short_link(1); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: recv_short_link failed: %d", __FILE__, __LINE__, ret); } /* reorder the pending sends by operation count */ ret = opal_list_sort(&ompi_mtl_portals4.flowctl.pending_sends, seqnum_compare); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d opal_list_sort failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* drain event queue again, just to make sure */ while (0 != ompi_mtl_portals4_progress()) { ; } /* send barrier entry message */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, 0, PTL_NO_ACK_REQ, ompi_mtl_portals4.flowctl.me, ompi_mtl_portals4.flowctl_idx, MTL_PORTALS4_FLOWCTL_FANIN, 0, NULL, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* recovery complete when fan-out event arrives, async event, so we're done now */ ret = OMPI_SUCCESS; error: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Exiting flowctl_start_recover %ld", epoch_counter)); return ret; }