static void big_get(void *buf, ptl_size_t left_to_get, MPIDI_VC_t *vc, ptl_match_bits_t match_bits, MPID_Request *rreq) { int ret; MPID_nem_ptl_vc_area *vc_ptl; ptl_size_t start, get_sz; vc_ptl = VC_PTL(vc); start = (ptl_size_t)buf; /* we need to handle all events */ REQ_PTL(rreq)->event_handler = handler_recv_big_get; while (left_to_get > 0) { /* get up to the maximum allowed by the portals interface */ if (left_to_get > MPIDI_nem_ptl_ni_limits.max_msg_size) get_sz = MPIDI_nem_ptl_ni_limits.max_msg_size; else get_sz = left_to_get; ret = MPID_nem_ptl_rptl_get(MPIDI_nem_ptl_global_md, start, get_sz, vc_ptl->id, vc_ptl->ptg, match_bits, 0, rreq); DBG_MSG_GET("global", get_sz, vc->pg_rank, match_bits); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, " buf=%p", (char *)start); MPIU_Assert(ret == 0); /* account for what has been sent */ start += get_sz; left_to_get -= get_sz; REQ_PTL(rreq)->num_gets++; } }
int vc_terminate(MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; int req_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_VC_TERMINATE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_VC_TERMINATE); if (vc->state != MPIDI_VC_STATE_CLOSED) { /* VC is terminated as a result of a fault. Complete outstanding sends with an error and terminate connection immediately. */ MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank); mpi_errno = MPID_nem_ptl_vc_terminated(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else if (vc_ptl->num_queued_sends == 0) { mpi_errno = MPID_nem_ptl_vc_terminated(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { /* the send_queued function will call vc_terminated if vc->state is CLOSED and the last queued send has been sent*/ } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_VC_TERMINATE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_init_id(MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); char *bc; int pmi_errno; int val_max_sz; MPIR_CHKLMEM_DECL(1); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_INIT_ID); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_INIT_ID); pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz); MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno); MPIR_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc"); mpi_errno = vc->pg->getConnInfo(vc->pg_rank, bc, val_max_sz, vc->pg); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_ptl_get_id_from_bc(bc, &vc_ptl->id, &vc_ptl->pt, &vc_ptl->ptg, &vc_ptl->ptc, &vc_ptl->ptr, &vc_ptl->ptrg, &vc_ptl->ptrc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); vc_ptl->id_initialized = TRUE; MPIDI_CHANGE_VC_STATE(vc, ACTIVE); fn_exit: MPIR_CHKLMEM_FREEALL(); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_INIT_ID); return mpi_errno; fn_fail: goto fn_exit; }
static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl_match_bits_t match_bits, MPID_Request *sreq) { int i, ret, was_incomplete; MPID_nem_ptl_vc_area *vc_ptl; ptl_me_t me; vc_ptl = VC_PTL(vc); me.start = buf; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE ); me.match_id = vc_ptl->id; me.match_bits = match_bits; me.ignore_bits = 0; me.min_free = 0; /* allocate enough handles to cover all get operations */ REQ_PTL(sreq)->get_me_p = MPIU_Malloc(sizeof(ptl_handle_me_t) * ((left_to_send / MPIDI_nem_ptl_ni_limits.max_msg_size) + 1)); /* queue up as many entries as necessary to describe the entire message */ for (i = 0; left_to_send > 0; i++) { /* send up to the maximum allowed by the portals interface */ if (left_to_send > MPIDI_nem_ptl_ni_limits.max_msg_size) me.length = MPIDI_nem_ptl_ni_limits.max_msg_size; else me.length = left_to_send; ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[i]); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); MPIU_Assert(ret == 0); /* increment the cc for each get operation */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); MPIU_Assert(was_incomplete); /* account for what has been sent */ me.start = (char *)me.start + me.length; left_to_send -= me.length; } }
static int get_target_info(int rank, ptl_process_t *id, ptl_pt_index_t local_data_pt, ptl_pt_index_t *target_data_pt, ptl_pt_index_t *target_control_pt) { int mpi_errno = MPI_SUCCESS; struct MPIDI_VC *vc; MPID_nem_ptl_vc_area *vc_ptl; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_GET_TARGET_INFO); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_GET_TARGET_INFO); MPIDI_PG_Get_vc(MPIDI_Process.my_pg, rank, &vc); vc_ptl = VC_PTL(vc); if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } *id = vc_ptl->id; MPIR_Assert(local_data_pt == MPIDI_nem_ptl_pt || local_data_pt == MPIDI_nem_ptl_get_pt || local_data_pt == MPIDI_nem_ptl_control_pt); if (local_data_pt == MPIDI_nem_ptl_pt) { *target_data_pt = vc_ptl->pt; *target_control_pt = vc_ptl->ptr; } else if (local_data_pt == MPIDI_nem_ptl_get_pt) { *target_data_pt = vc_ptl->ptg; *target_control_pt = PTL_PT_ANY; } else if (local_data_pt == MPIDI_nem_ptl_control_pt) { *target_data_pt = vc_ptl->ptc; *target_control_pt = vc_ptl->ptrc; } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_GET_TARGET_INFO); return mpi_errno; fn_fail: goto fn_exit; }
static int vc_init(MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3I_VC *const vc_ch = &vc->ch; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_VC_INIT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_VC_INIT); vc->sendNoncontig_fn = MPID_nem_ptl_SendNoncontig; vc_ch->iStartContigMsg = MPID_nem_ptl_iStartContigMsg; vc_ch->iSendContig = MPID_nem_ptl_iSendContig; vc_ch->num_pkt_handlers = 2; vc_ch->pkt_handler = MPID_nem_ptl_pkt_handlers; MPID_nem_ptl_pkt_handlers[MPIDI_NEM_PTL_PKT_CANCEL_SEND_REQ] = MPID_nem_ptl_pkt_cancel_send_req_handler; MPID_nem_ptl_pkt_handlers[MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP] = MPID_nem_ptl_pkt_cancel_send_resp_handler; vc_ch->lmt_initiate_lmt = MPID_nem_ptl_lmt_initiate_lmt; vc_ch->lmt_start_recv = MPID_nem_ptl_lmt_start_recv; vc_ch->lmt_start_send = MPID_nem_ptl_lmt_start_send; vc_ch->lmt_handle_cookie = MPID_nem_ptl_lmt_handle_cookie; vc_ch->lmt_done_send = MPID_nem_ptl_lmt_done_send; vc_ch->lmt_done_recv = MPID_nem_ptl_lmt_done_recv; vc->comm_ops = &comm_ops; vc_ch->next = NULL; vc_ch->prev = NULL; vc_ptl->id_initialized = FALSE; vc_ptl->num_queued_sends = 0; mpi_errno = MPID_nem_ptl_init_id(vc); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_VC_INIT); return mpi_errno; }
int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp) { int ret, mpi_errno = MPI_SUCCESS; MPIDI_nem_ptl_pkt_cancel_send_req_t *req_pkt = (MPIDI_nem_ptl_pkt_cancel_send_req_t *)pkt; MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_resp_t, resp_pkt); MPID_Request *search_req, *resp_req; ptl_me_t me; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", req_pkt->sender_req_id, req_pkt->match.parts.rank, req_pkt->match.parts.tag, req_pkt->match.parts.context_id)); /* create a dummy request and search for the message */ /* create a request */ search_req = MPID_Request_create(); MPID_nem_ptl_init_req(search_req); MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */ search_req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(req_pkt->match.parts.tag, req_pkt->match.parts.context_id, req_pkt->match.parts.rank); me.match_id = vc_ptl->id; me.ignore_bits = NPTL_MATCH_IGNORE; /* FIXME: this should use a custom handler that throws the data away inline */ REQ_PTL(search_req)->event_handler = handle_mprobe; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, search_req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, search_req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(search_req)); /* send response */ resp_pkt->type = MPIDI_NEM_PKT_NETMOD; resp_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP; resp_pkt->ack = REQ_PTL(search_req)->found; resp_pkt->sender_req_id = req_pkt->sender_req_id; MPID_nem_ptl_iStartContigMsg(vc, resp_pkt, sizeof(*resp_pkt), NULL, 0, &resp_req); /* if the message was found, free the temporary buffer used to copy the data */ if (REQ_PTL(search_req)->found) MPIU_Free(search_req->dev.tmpbuf); MPID_Request_release(search_req); if (resp_req != NULL) MPID_Request_release(resp_req); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, int context_offset, int *flag, MPID_Request **message, MPI_Status *status) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; ptl_process_t id_any; ptl_me_t me; MPID_Request *req; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; /* create a request */ req = MPID_Request_create(); MPID_nem_ptl_init_req(req); MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */ REQ_PTL(req)->event_handler = handle_mprobe; req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, source); if (source == MPI_ANY_SOURCE) me.match_id = id_any; else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } if (tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(req)); *flag = REQ_PTL(req)->found; if (*flag) { req->comm = comm; MPIR_Comm_add_ref(comm); MPIR_Request_extract_status(req, status); *message = req; } else { MPID_Request_release(req); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPID_Request *rreq) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); ptl_me_t me; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPI_Aint last; ptl_process_t id_any; int ret; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RECV_POSTED); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RECV_POSTED); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; MPID_nem_ptl_init_req(rreq); me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_USE_ONCE ); if (vc == NULL) { /* MPI_ANY_SOURCE receive */ me.match_id = id_any; } else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "tag=%#x ctx=%#x rank=%#x", rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id, rreq->dev.match.parts.rank)); me.match_bits = NPTL_MATCH(rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id, rreq->dev.match.parts.rank); if (rreq->dev.match.parts.tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; me.min_free = 0; MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz)); if (data_sz <= PTL_LARGE_THRESHOLD) { if (dt_contig) { /* small contig message */ void *start = (char *)rreq->dev.user_buf + dt_true_lb; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message"); if (start == NULL) me.start = &dummy; else me.start = start; me.length = data_sz; REQ_PTL(rreq)->event_handler = handler_recv_dequeue_complete; } else { /* small noncontig */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message"); rreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(rreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, rreq->dev.segment_ptr, 0); rreq->dev.segment_first = 0; rreq->dev.segment_size = data_sz; last = rreq->dev.segment_size; rreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count); if (last == rreq->dev.segment_size) { /* entire message fits in IOV */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " entire message fits in IOV"); me.start = rreq->dev.iov; me.length = rreq->dev.iov_count; me.options |= PTL_IOVEC; REQ_PTL(rreq)->event_handler = handler_recv_dequeue_complete; } else { /* IOV is not long enough to describe entire message: recv into buffer and unpack later */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " IOV too long: using bounce buffer"); MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer"); me.start = REQ_PTL(rreq)->chunk_buffer[0]; me.length = data_sz; REQ_PTL(rreq)->event_handler = handler_recv_dequeue_unpack_complete; } } } else { /* Large message: Create an ME for the first chunk of data, then do a GET for the rest */ if (dt_contig) {
static int handler_recv_dequeue_large(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; MPIDI_VC_t *vc; MPID_nem_ptl_vc_area *vc_ptl; int ret; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPI_Aint last; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc); vc_ptl = VC_PTL(vc); dequeue_req(e); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); /* unpack data from unexpected buffer first */ if (e->type == PTL_EVENT_PUT_OVERFLOW) { if (dt_contig) { MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength); } else { last = e->mlength; MPID_Segment_unpack(rreq->dev.segment_ptr, 0, &last, e->start); MPIU_Assert(last == e->mlength); rreq->dev.segment_first = e->mlength; } } if (!(e->hdr_data & NPTL_LARGE)) { /* all data has already been received; we're done */ mpi_errno = handler_recv_complete(e); if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); /* we need to GET the rest of the data from the sender's buffer */ if (dt_contig) { big_get((char *)rreq->dev.user_buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); goto fn_exit; } /* noncontig recv buffer */ last = rreq->dev.segment_size; rreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count); if (last == rreq->dev.segment_size && rreq->dev.segment_size <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Rest of message fits in one IOV */ ptl_md_t md; md.start = rreq->dev.iov; md.length = rreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(rreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(rreq)->event_handler = handler_recv_complete; ret = MPID_nem_ptl_rptl_get(REQ_PTL(rreq)->md, 0, rreq->dev.segment_size - rreq->dev.segment_first, vc_ptl->id, vc_ptl->ptg, e->match_bits, 0, rreq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s", MPID_nem_ptl_strerror(ret)); goto fn_exit; } /* message won't fit in a single IOV, allocate buffer and unpack when received */ /* FIXME: For now, allocate a single large buffer to hold entire message */ MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer"); big_get(REQ_PTL(rreq)->chunk_buffer[0], data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); fn_exit: MPIU_CHKPMEM_COMMIT(); fn_exit2: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit2; }
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; MPID_Request *sreq = NULL; ptl_me_t me; int initial_iov_count, remaining_iov_count; ptl_md_t md; MPI_Aint last; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_SEND_MSG); MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG); MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm); sreq->dev.match.parts.rank = dest; sreq->dev.match.parts.tag = tag; sreq->dev.match.parts.context_id = comm->context_id + context_offset; sreq->ch.vc = vc; if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz)); if (data_sz <= PTL_LARGE_THRESHOLD) { /* Small message. Send all data eagerly */ if (dt_contig) { void *start = (char *)buf + dt_true_lb; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message"); REQ_PTL(sreq)->event_handler = handler_send; MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler)); if (start == NULL) ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); else ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler); goto fn_exit; } /* noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = sreq->dev.segment_size; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); if (last == sreq->dev.segment_size) { /* IOV is able to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " entire message fits in IOV"); md.start = sreq->dev.iov; md.length = sreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* IOV is not long enough to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " IOV too long: using bounce buffer"); MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; last = data_sz; MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]); MPIU_Assert(last == sreq->dev.segment_size); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* Large message. Send first chunk of data and let receiver get the rest */ if (dt_contig) { /* create ME for buffer so receiver can issue a GET for the data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message"); big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; } /* Large noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = PTL_LARGE_THRESHOLD; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); initial_iov_count = sreq->dev.iov_count; sreq->dev.segment_first = last; if (last == PTL_LARGE_THRESHOLD) { /* first chunk of message fits into IOV */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " first chunk fits in IOV"); if (initial_iov_count < MPL_IOV_LIMIT) { /* There may be space for the rest of the message in this IOV */ sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count; last = sreq->dev.segment_size; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count); remaining_iov_count = sreq->dev.iov_count; if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Entire message fit in one IOV */ int was_incomplete; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " rest of message fits in one IOV"); /* Create ME for remaining data */ me.start = &sreq->dev.iov[initial_iov_count]; me.length = remaining_iov_count; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC ); me.match_id = vc_ptl->id; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank); me.ignore_bits = 0; me.min_free = 0; MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p"); ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[0]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); /* increment the cc for the get operation */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); MPIU_Assert(was_incomplete); /* Create MD for first chunk */ md.start = sreq->dev.iov; md.length = initial_iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; }
} #undef FUNCNAME #define FUNCNAME MPID_nem_ptl_pkt_cancel_send_req_handler #undef FCNAME #define FCNAME MPL_QUOTE(FUNCNAME) int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, void *data ATTRIBUTE((unused)), intptr_t *buflen, MPIR_Request **rreqp) { int ret, mpi_errno = MPI_SUCCESS; MPIDI_nem_ptl_pkt_cancel_send_req_t *req_pkt = (MPIDI_nem_ptl_pkt_cancel_send_req_t *)pkt; MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_resp_t, resp_pkt); MPIR_Request *search_req, *resp_req; ptl_me_t me; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", req_pkt->sender_req_id, req_pkt->match.parts.rank, req_pkt->match.parts.tag, req_pkt->match.parts.context_id)); /* create a dummy request and search for the message */ /* create a request */ search_req = MPIR_Request_create(MPIR_REQUEST_KIND__MPROBE); MPID_nem_ptl_init_req(search_req); MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Request_create"); MPIR_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */ /* create a dummy ME to use for searching the list */ me.start = NULL;