static int append_overflow(int i) { int mpi_errno = MPI_SUCCESS; int ret; ptl_me_t me; ptl_process_t id_any; MPIDI_STATE_DECL(MPID_STATE_APPEND_OVERFLOW); MPIDI_FUNC_ENTER(MPID_STATE_APPEND_OVERFLOW); MPIU_Assert(i >= 0 && i < NUM_OVERFLOW_ME); id_any.phys.pid = PTL_PID_ANY; id_any.phys.nid = PTL_NID_ANY; me.start = overflow_buf[i]; me.length = OVERFLOW_LENGTH; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_MANAGE_LOCAL | PTL_ME_NO_TRUNCATE | PTL_ME_MAY_ALIGN | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE ); me.match_id = id_any; me.match_bits = 0; me.ignore_bits = ~((ptl_match_bits_t)0); me.min_free = PTL_LARGE_THRESHOLD; /* if there is no space to append the entry, process outstanding events and try again */ ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i, &overflow_me_handle[i]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_APPEND_OVERFLOW); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_poll(int is_blocking_poll) { int mpi_errno = MPI_SUCCESS; ptl_event_t event; int ret; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL); /* MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL); */ while (1) { int ctl_event = FALSE; /* Check the rptls EQ first. It should never return an event. */ ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_rpt_eq, &event); MPIU_Assert(ret == PTL_EQ_EMPTY); /* check EQs for events */ ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); if (ret == PTL_EQ_EMPTY) { ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_get_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); if (ret == PTL_EQ_EMPTY) { ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_control_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); if (ret == PTL_EQ_EMPTY) { ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_origin_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); } else { ctl_event = TRUE; } /* all EQs are empty */ if (ret == PTL_EQ_EMPTY) break; } } MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqget", "**ptleqget %s", MPID_nem_ptl_strerror(ret)); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "Received event %s pt_idx=%d ni_fail=%s list=%s user_ptr=%p hdr_data=%#lx mlength=%lu rlength=%lu", MPID_nem_ptl_strevent(&event), event.pt_index, MPID_nem_ptl_strnifail(event.ni_fail_type), MPID_nem_ptl_strlist(event.ptl_list), event.user_ptr, event.hdr_data, event.mlength, event.rlength)); MPIR_ERR_CHKANDJUMP2(event.ni_fail_type != PTL_NI_OK && event.ni_fail_type != PTL_NI_NO_MATCH, mpi_errno, MPI_ERR_OTHER, "**ptlni_fail", "**ptlni_fail %s %s", MPID_nem_ptl_strevent(&event), MPID_nem_ptl_strnifail(event.ni_fail_type)); /* special case for events on the control portal */ if (ctl_event) { mpi_errno = MPID_nem_ptl_nm_ctl_event_handler(&event); if (mpi_errno) MPIR_ERR_POP(mpi_errno); continue; } switch (event.type) { case PTL_EVENT_PUT: if (event.ptl_list == PTL_OVERFLOW_LIST) break; case PTL_EVENT_PUT_OVERFLOW: case PTL_EVENT_GET: case PTL_EVENT_SEND: case PTL_EVENT_REPLY: case PTL_EVENT_SEARCH: { MPID_Request * const req = event.user_ptr; MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "req = %p", req); MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "REQ_PTL(req)->event_handler = %p", REQ_PTL(req)->event_handler); if (REQ_PTL(req)->event_handler) { mpi_errno = REQ_PTL(req)->event_handler(&event); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } break; } case PTL_EVENT_AUTO_FREE: mpi_errno = append_overflow((size_t)event.user_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; case PTL_EVENT_AUTO_UNLINK: overflow_me_handle[(size_t)event.user_ptr] = PTL_INVALID_HANDLE; break; case PTL_EVENT_LINK: /* ignore */ break; case PTL_EVENT_ACK: default: MPL_error_printf("Received unexpected event type: %d %s", event.type, MPID_nem_ptl_strevent(&event)); MPIR_ERR_INTERNALANDJUMP(mpi_errno, "Unexpected event type"); } } fn_exit: /* MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL); */ return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_poll_finalize(void) { int mpi_errno = MPI_SUCCESS; int i; int ret; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE); for (i = 0; i < NUM_OVERFLOW_ME; ++i) { if (overflow_me_handle[i] != PTL_INVALID_HANDLE) { ret = PtlMEUnlink(overflow_me_handle[i]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeunlink", "**ptlmeunlink %s", MPID_nem_ptl_strerror(ret)); } MPL_free(overflow_buf[i]); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp) { int ret, mpi_errno = MPI_SUCCESS; MPIDI_nem_ptl_pkt_cancel_send_req_t *req_pkt = (MPIDI_nem_ptl_pkt_cancel_send_req_t *)pkt; MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_resp_t, resp_pkt); MPID_Request *search_req, *resp_req; ptl_me_t me; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", req_pkt->sender_req_id, req_pkt->match.parts.rank, req_pkt->match.parts.tag, req_pkt->match.parts.context_id)); /* create a dummy request and search for the message */ /* create a request */ search_req = MPID_Request_create(); MPID_nem_ptl_init_req(search_req); MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */ search_req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(req_pkt->match.parts.tag, req_pkt->match.parts.context_id, req_pkt->match.parts.rank); me.match_id = vc_ptl->id; me.ignore_bits = NPTL_MATCH_IGNORE; /* FIXME: this should use a custom handler that throws the data away inline */ REQ_PTL(search_req)->event_handler = handle_mprobe; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, search_req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, search_req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(search_req)); /* send response */ resp_pkt->type = MPIDI_NEM_PKT_NETMOD; resp_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP; resp_pkt->ack = REQ_PTL(search_req)->found; resp_pkt->sender_req_id = req_pkt->sender_req_id; MPID_nem_ptl_iStartContigMsg(vc, resp_pkt, sizeof(*resp_pkt), NULL, 0, &resp_req); /* if the message was found, free the temporary buffer used to copy the data */ if (REQ_PTL(search_req)->found) MPIU_Free(search_req->dev.tmpbuf); MPID_Request_release(search_req); if (resp_req != NULL) MPID_Request_release(resp_req); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_pkt_cancel_send_resp_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp) { int mpi_errno = MPI_SUCCESS; MPID_Request *sreq; MPIDI_nem_ptl_pkt_cancel_send_resp_t *resp_pkt = (MPIDI_nem_ptl_pkt_cancel_send_resp_t *)pkt; int i, ret; MPID_Request_get_ptr(resp_pkt->sender_req_id, sreq); if (resp_pkt->ack) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* remove/free any remaining get MEs and handles */ for (i = 0; i < REQ_PTL(sreq)->num_gets; i++) { ret = PtlMEUnlink(REQ_PTL(sreq)->get_me_p[i]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeunlink", "**ptlmeunlink %s", MPID_nem_ptl_strerror(ret)); mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } if (REQ_PTL(sreq)->get_me_p) MPIU_Free(REQ_PTL(sreq)->get_me_p); MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"message cancelled"); } else { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE); MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"unable to cancel message"); } mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
static int handler_recv_complete(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; int ret; int i; MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_COMPLETE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_COMPLETE); MPIU_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); if (REQ_PTL(rreq)->md != PTL_INVALID_HANDLE) { ret = PtlMDRelease(REQ_PTL(rreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdrelease", "**ptlmdrelease %s", MPID_nem_ptl_strerror(ret)); } for (i = 0; i < MPID_NEM_PTL_NUM_CHUNK_BUFFERS; ++i) if (REQ_PTL(rreq)->chunk_buffer[i]) MPIU_Free(REQ_PTL(rreq)->chunk_buffer[i]); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_COMPLETE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, int context_offset, int *flag, MPID_Request **message, MPI_Status *status) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; ptl_process_t id_any; ptl_me_t me; MPID_Request *req; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; /* create a request */ req = MPID_Request_create(); MPID_nem_ptl_init_req(req); MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */ REQ_PTL(req)->event_handler = handle_mprobe; req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, source); if (source == MPI_ANY_SOURCE) me.match_id = id_any; else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } if (tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(req)); *flag = REQ_PTL(req)->found; if (*flag) { req->comm = comm; MPIR_Comm_add_ref(comm); MPIR_Request_extract_status(req, status); *message = req; } else { MPID_Request_release(req); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE); return mpi_errno; fn_fail: goto fn_exit; }
static int get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p) { int mpi_errno = MPI_SUCCESS; int str_errno = MPL_STR_SUCCESS; int ret; ptl_process_t my_ptl_id; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_BUSINESS_CARD); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_BUSINESS_CARD); ret = PtlGetId(MPIDI_nem_ptl_ni, &my_ptl_id); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlgetid", "**ptlgetid %s", MPID_nem_ptl_strerror(ret)); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "Allocated NI and PT id=(%#x,%#x) pt=%#x", my_ptl_id.phys.nid, my_ptl_id.phys.pid, MPIDI_nem_ptl_pt)); str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, NID_KEY, (char *)&my_ptl_id.phys.nid, sizeof(my_ptl_id.phys.nid)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PID_KEY, (char *)&my_ptl_id.phys.pid, sizeof(my_ptl_id.phys.pid)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PTI_KEY, (char *)&MPIDI_nem_ptl_pt, sizeof(MPIDI_nem_ptl_pt)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PTIG_KEY, (char *)&MPIDI_nem_ptl_get_pt, sizeof(MPIDI_nem_ptl_get_pt)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PTIC_KEY, (char *)&MPIDI_nem_ptl_control_pt, sizeof(MPIDI_nem_ptl_control_pt)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PTIR_KEY, (char *)&MPIDI_nem_ptl_rpt_pt, sizeof(MPIDI_nem_ptl_rpt_pt)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PTIRG_KEY, (char *)&MPIDI_nem_ptl_get_rpt_pt, sizeof(MPIDI_nem_ptl_get_rpt_pt)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, PTIRC_KEY, (char *)&MPIDI_nem_ptl_control_rpt_pt, sizeof(MPIDI_nem_ptl_control_rpt_pt)); if (str_errno) { MPIR_ERR_CHKANDJUMP(str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len"); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard"); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_BUSINESS_CARD); return mpi_errno; fn_fail: goto fn_exit; }
static int handler_recv_dequeue_large(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; MPIDI_VC_t *vc; MPID_nem_ptl_vc_area *vc_ptl; int ret; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPI_Aint last; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc); vc_ptl = VC_PTL(vc); dequeue_req(e); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); /* unpack data from unexpected buffer first */ if (e->type == PTL_EVENT_PUT_OVERFLOW) { if (dt_contig) { MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength); } else { last = e->mlength; MPID_Segment_unpack(rreq->dev.segment_ptr, 0, &last, e->start); MPIU_Assert(last == e->mlength); rreq->dev.segment_first = e->mlength; } } if (!(e->hdr_data & NPTL_LARGE)) { /* all data has already been received; we're done */ mpi_errno = handler_recv_complete(e); if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); /* we need to GET the rest of the data from the sender's buffer */ if (dt_contig) { big_get((char *)rreq->dev.user_buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); goto fn_exit; } /* noncontig recv buffer */ last = rreq->dev.segment_size; rreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count); if (last == rreq->dev.segment_size && rreq->dev.segment_size <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Rest of message fits in one IOV */ ptl_md_t md; md.start = rreq->dev.iov; md.length = rreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(rreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(rreq)->event_handler = handler_recv_complete; ret = MPID_nem_ptl_rptl_get(REQ_PTL(rreq)->md, 0, rreq->dev.segment_size - rreq->dev.segment_first, vc_ptl->id, vc_ptl->ptg, e->match_bits, 0, rreq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s", MPID_nem_ptl_strerror(ret)); goto fn_exit; } /* message won't fit in a single IOV, allocate buffer and unpack when received */ /* FIXME: For now, allocate a single large buffer to hold entire message */ MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer"); big_get(REQ_PTL(rreq)->chunk_buffer[0], data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); fn_exit: MPIU_CHKPMEM_COMMIT(); fn_exit2: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit2; }
static int ptl_finalize(void) { int mpi_errno = MPI_SUCCESS; int ret; ptl_handle_eq_t eqs[5]; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PTL_FINALIZE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PTL_FINALIZE); /* shut down other modules */ mpi_errno = MPID_nem_ptl_nm_finalize(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_ptl_poll_finalize(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* shut down portals */ eqs[0] = MPIDI_nem_ptl_eq; eqs[1] = MPIDI_nem_ptl_get_eq; eqs[2] = MPIDI_nem_ptl_control_eq; eqs[3] = MPIDI_nem_ptl_origin_eq; eqs[4] = MPIDI_nem_ptl_rpt_eq; ret = MPID_nem_ptl_rptl_drain_eq(5, eqs); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = MPID_nem_ptl_rptl_ptfini(MPIDI_nem_ptl_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlPTFree(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = MPID_nem_ptl_rptl_ptfini(MPIDI_nem_ptl_get_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlPTFree(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = MPID_nem_ptl_rptl_ptfini(MPIDI_nem_ptl_control_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlPTFree(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlPTFree(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlPTFree(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlPTFree(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptfree", "**ptlptfree %s", MPID_nem_ptl_strerror(ret)); ret = PtlNIFini(MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlnifini", "**ptlnifini %s", MPID_nem_ptl_strerror(ret)); PtlFini(); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PTL_FINALIZE); return mpi_errno; fn_fail: goto fn_exit; }
static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p) { int mpi_errno = MPI_SUCCESS; int ret; ptl_md_t md; ptl_ni_limits_t desired; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PTL_INIT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PTL_INIT); /* first make sure that our private fields in the vc and req fit into the area provided */ MPIR_Assert(sizeof(MPID_nem_ptl_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN); MPIR_Assert(sizeof(MPID_nem_ptl_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN); /* Make sure our IOV is the same as portals4's IOV */ MPIR_Assert(sizeof(ptl_iovec_t) == sizeof(MPL_IOV)); MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_base)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_BUF))); MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_len)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_LEN))); MPIR_Assert(sizeof(((ptl_iovec_t*)0)->iov_len) == sizeof(((MPL_IOV*)0)->MPL_IOV_LEN)); mpi_errno = MPIDI_CH3I_Register_anysource_notification(MPID_nem_ptl_anysource_posted, MPID_nem_ptl_anysource_matched); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIDI_Anysource_improbe_fn = MPID_nem_ptl_anysource_improbe; /* init portals */ ret = PtlInit(); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlinit", "**ptlinit %s", MPID_nem_ptl_strerror(ret)); /* do an interface pre-init to get the default limits struct */ ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL, PTL_PID_ANY, NULL, &desired, &MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); /* finalize the interface so we can re-init with our desired maximums */ ret = PtlNIFini(MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlnifini", "**ptlnifini %s", MPID_nem_ptl_strerror(ret)); /* set higher limits if they are determined to be too low */ if (desired.max_unexpected_headers < UNEXPECTED_HDR_COUNT && getenv("PTL_LIM_MAX_UNEXPECTED_HEADERS") == NULL) desired.max_unexpected_headers = UNEXPECTED_HDR_COUNT; if (desired.max_list_size < LIST_SIZE && getenv("PTL_LIM_MAX_LIST_SIZE") == NULL) desired.max_list_size = LIST_SIZE; if (desired.max_entries < ENTRY_COUNT && getenv("PTL_LIM_MAX_ENTRIES") == NULL) desired.max_entries = ENTRY_COUNT; /* do the real init */ ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL, PTL_PID_ANY, &desired, &MPIDI_nem_ptl_ni_limits, &MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); /* allocate EQs for each portal */ ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_get_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_control_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_rpt_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate a separate EQ for origin events. with this, we can implement rate-limit operations to prevent a locally triggered flow control even */ ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_origin_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for matching messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_eq, PTL_PT_ANY, &MPIDI_nem_ptl_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for large messages where receiver does a get */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_get_eq, PTL_PT_ANY, &MPIDI_nem_ptl_get_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_control_eq, PTL_PT_ANY, &MPIDI_nem_ptl_control_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq, PTL_PT_ANY, &MPIDI_nem_ptl_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq, PTL_PT_ANY, &MPIDI_nem_ptl_get_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq, PTL_PT_ANY, &MPIDI_nem_ptl_control_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* create an MD that covers all of memory */ md.start = 0; md.length = (ptl_size_t)-1; md.options = 0x0; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &MPIDI_nem_ptl_global_md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); /* currently, rportlas only works with a single NI and EQ */ ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, ORIGIN_EVENTS, get_target_info); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); /* allow rportal to manage the primary portal and retransmit if needed */ ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_pt, MPIDI_nem_ptl_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allow rportal to manage the get and control portals, but we * don't expect retransmission to be needed on the get portal, so * we pass PTL_PT_ANY as the dummy portal. unfortunately, portals * does not have an "invalid" PT constant, which would have been * more appropriate to pass over here. */ ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_get_pt, MPIDI_nem_ptl_get_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_control_pt, MPIDI_nem_ptl_control_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* create business card */ mpi_errno = get_business_card(pg_rank, bc_val_p, val_max_sz_p); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* init other modules */ mpi_errno = MPID_nem_ptl_poll_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_ptl_nm_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PTL_INIT); return mpi_errno; fn_fail: goto fn_exit; }
static int handler_send(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const sreq = e->user_ptr; int i, ret; MPIDI_STATE_DECL(MPID_STATE_HANDLER_SEND); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_SEND); MPIU_Assert(e->type == PTL_EVENT_SEND || e->type == PTL_EVENT_GET); /* if we are done, release all netmod resources */ if (MPID_cc_get(sreq->cc) == 1) { if (REQ_PTL(sreq)->md != PTL_INVALID_HANDLE) { ret = PtlMDRelease(REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdrelease", "**ptlmdrelease %s", MPID_nem_ptl_strerror(ret)); } for (i = 0; i < MPID_NEM_PTL_NUM_CHUNK_BUFFERS; ++i) if (REQ_PTL(sreq)->chunk_buffer[i]) MPIU_Free(REQ_PTL(sreq)->chunk_buffer[i]); if (REQ_PTL(sreq)->get_me_p) MPIU_Free(REQ_PTL(sreq)->get_me_p); } mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_SEND); return mpi_errno; fn_fail: goto fn_exit; }
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; MPID_Request *sreq = NULL; ptl_me_t me; int initial_iov_count, remaining_iov_count; ptl_md_t md; MPI_Aint last; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_SEND_MSG); MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG); MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm); sreq->dev.match.parts.rank = dest; sreq->dev.match.parts.tag = tag; sreq->dev.match.parts.context_id = comm->context_id + context_offset; sreq->ch.vc = vc; if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz)); if (data_sz <= PTL_LARGE_THRESHOLD) { /* Small message. Send all data eagerly */ if (dt_contig) { void *start = (char *)buf + dt_true_lb; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message"); REQ_PTL(sreq)->event_handler = handler_send; MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler)); if (start == NULL) ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); else ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler); goto fn_exit; } /* noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = sreq->dev.segment_size; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); if (last == sreq->dev.segment_size) { /* IOV is able to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " entire message fits in IOV"); md.start = sreq->dev.iov; md.length = sreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* IOV is not long enough to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " IOV too long: using bounce buffer"); MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; last = data_sz; MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]); MPIU_Assert(last == sreq->dev.segment_size); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* Large message. Send first chunk of data and let receiver get the rest */ if (dt_contig) { /* create ME for buffer so receiver can issue a GET for the data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message"); big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; } /* Large noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = PTL_LARGE_THRESHOLD; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); initial_iov_count = sreq->dev.iov_count; sreq->dev.segment_first = last; if (last == PTL_LARGE_THRESHOLD) { /* first chunk of message fits into IOV */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " first chunk fits in IOV"); if (initial_iov_count < MPL_IOV_LIMIT) { /* There may be space for the rest of the message in this IOV */ sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count; last = sreq->dev.segment_size; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count); remaining_iov_count = sreq->dev.iov_count; if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Entire message fit in one IOV */ int was_incomplete; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " rest of message fits in one IOV"); /* Create ME for remaining data */ me.start = &sreq->dev.iov[initial_iov_count]; me.length = remaining_iov_count; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC ); me.match_id = vc_ptl->id; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank); me.ignore_bits = 0; me.min_free = 0; MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p"); ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[0]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); /* increment the cc for the get operation */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); MPIU_Assert(was_incomplete); /* Create MD for first chunk */ md.start = sreq->dev.iov; md.length = initial_iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; }
me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(req_pkt->match.parts.tag, req_pkt->match.parts.context_id, req_pkt->match.parts.rank); me.match_id = vc_ptl->id; me.ignore_bits = NPTL_MATCH_IGNORE; /* FIXME: this should use a custom handler that throws the data away inline */ REQ_PTL(search_req)->event_handler = handle_mprobe; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, search_req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, search_req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPIR_Request_is_complete(search_req)); /* send response */ resp_pkt->type = MPIDI_NEM_PKT_NETMOD; resp_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP; resp_pkt->ack = REQ_PTL(search_req)->found; resp_pkt->sender_req_id = req_pkt->sender_req_id; MPID_nem_ptl_iStartContigMsg(vc, resp_pkt, sizeof(*resp_pkt), NULL,