/* initialize short copy blocks */ void ompi_mtl_portals_short_setup() { int ret; int i; if ((ompi_mtl_portals.ptl_num_copy_blocks > 0) && (ompi_mtl_portals.ptl_copy_block_len > 0)) { ompi_mtl_portals.ptl_short_md.length = ompi_mtl_portals.ptl_num_copy_blocks * ompi_mtl_portals.ptl_copy_block_len; ompi_mtl_portals.ptl_short_md.start = malloc(ompi_mtl_portals.ptl_short_md.length); if (NULL == ompi_mtl_portals.ptl_short_md.start ) { ompi_mtl_portals.ptl_num_copy_blocks = 0; return; } ompi_mtl_portals.ptl_short_md.threshold = PTL_MD_THRESH_INF; ompi_mtl_portals.ptl_short_md.max_size = 0; ompi_mtl_portals.ptl_short_md.options = PTL_MD_EVENT_START_DISABLE; ompi_mtl_portals.ptl_short_md.user_ptr = &ptl_short_request; ompi_mtl_portals.ptl_short_md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, ompi_mtl_portals.ptl_short_md, PTL_RETAIN, &ompi_mtl_portals.ptl_short_md_h); if (PTL_OK != ret) { free(ompi_mtl_portals.ptl_short_md.start); ompi_mtl_portals.ptl_num_copy_blocks = 0; return; } ptl_short_request.event_callback = ompi_mtl_portals_short_callback; ompi_mtl_portals.ptl_copy_block_free_list = malloc(ompi_mtl_portals.ptl_num_copy_blocks * sizeof(int)); if (NULL == ompi_mtl_portals.ptl_copy_block_free_list) { free(ompi_mtl_portals.ptl_short_md.start); ompi_mtl_portals.ptl_num_copy_blocks = 0; return; } for (i=0; i<ompi_mtl_portals.ptl_num_copy_blocks; i++) { ompi_mtl_portals.ptl_copy_block_free_list[i] = i; } ompi_mtl_portals.ptl_copy_block_first_free = 0; } }
static int read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { ptl_md_t md; int ret; /* FIX ME: This needs to be on the send eq... */ md.start = start; md.length = length; md.options = 0; md.eq_handle = ompi_mtl_portals4.send_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &request->md_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); ompi_mtl_portals4_progress(); } #endif ret = PtlGet(request->md_h, 0, md.length, target, ompi_mtl_portals4.read_idx, match_bits, remote_offset, request); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); PtlMDRelease(request->md_h); return OMPI_ERR_OUT_OF_RESOURCE; } return OMPI_SUCCESS; }
int MPID_nem_ptl_rptl_ptinit(ptl_handle_ni_t ni_handle, ptl_handle_eq_t eq_handle, ptl_pt_index_t data_pt, ptl_pt_index_t control_pt) { int ret = PTL_OK; struct rptl *rptl; int mpi_errno = MPI_SUCCESS; int i; ptl_md_t md; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT); /* setup the parts of rptls that can be done before world size or * target information */ MPIU_CHKPMEM_MALLOC(rptl, struct rptl *, sizeof(struct rptl), mpi_errno, "rptl"); MPL_DL_APPEND(rptl_info.rptl_list, rptl); rptl->local_state = RPTL_LOCAL_STATE_ACTIVE; rptl->pause_ack_counter = 0; rptl->data.ob_max_count = 0; rptl->data.ob_curr_count = 0; rptl->data.pt = data_pt; rptl->control.pt = control_pt; rptl->ni = ni_handle; rptl->eq = eq_handle; md.start = 0; md.length = (ptl_size_t) (-1); md.options = 0x0; md.eq_handle = rptl->eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(rptl->ni, &md, &rptl->md); RPTLU_ERR_POP(ret, "Error binding new global MD\n"); /* post world_size number of empty buffers on the control portal */ if (rptl->control.pt != PTL_PT_ANY) { MPIU_CHKPMEM_MALLOC(rptl->control.me, ptl_handle_me_t *, 2 * rptl_info.world_size * sizeof(ptl_handle_me_t), mpi_errno, "rptl target info"); for (i = 0; i < 2 * rptl_info.world_size; i++) { ret = rptli_post_control_buffer(rptl->ni, rptl->control.pt, &rptl->control.me[i]); RPTLU_ERR_POP(ret, "Error in rptli_post_control_buffer\n"); } rptl->control.me_idx = 0; }
static int handler_recv_dequeue_large(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; MPIDI_VC_t *vc; MPID_nem_ptl_vc_area *vc_ptl; int ret; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPI_Aint last; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc); vc_ptl = VC_PTL(vc); dequeue_req(e); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); /* unpack data from unexpected buffer first */ if (e->type == PTL_EVENT_PUT_OVERFLOW) { if (dt_contig) { MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength); } else { last = e->mlength; MPID_Segment_unpack(rreq->dev.segment_ptr, 0, &last, e->start); MPIU_Assert(last == e->mlength); rreq->dev.segment_first = e->mlength; } } if (!(e->hdr_data & NPTL_LARGE)) { /* all data has already been received; we're done */ mpi_errno = handler_recv_complete(e); if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); /* we need to GET the rest of the data from the sender's buffer */ if (dt_contig) { big_get((char *)rreq->dev.user_buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); goto fn_exit; } /* noncontig recv buffer */ last = rreq->dev.segment_size; rreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count); if (last == rreq->dev.segment_size && rreq->dev.segment_size <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Rest of message fits in one IOV */ ptl_md_t md; md.start = rreq->dev.iov; md.length = rreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(rreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(rreq)->event_handler = handler_recv_complete; ret = MPID_nem_ptl_rptl_get(REQ_PTL(rreq)->md, 0, rreq->dev.segment_size - rreq->dev.segment_first, vc_ptl->id, vc_ptl->ptg, e->match_bits, 0, rreq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s", MPID_nem_ptl_strerror(ret)); goto fn_exit; } /* message won't fit in a single IOV, allocate buffer and unpack when received */ /* FIXME: For now, allocate a single large buffer to hold entire message */ MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer"); big_get(REQ_PTL(rreq)->chunk_buffer[0], data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); fn_exit: MPIU_CHKPMEM_COMMIT(); fn_exit2: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit2; }
static int btl_portals4_init_interface(void) { mca_btl_portals4_module_t *portals4_btl; unsigned int ret, interface; ptl_md_t md; ptl_me_t me; // The initialisation of EQ, PT and ME must be done after the SetMap ! for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) { portals4_btl = mca_btl_portals4_component.btls[interface]; /* create event queue */ ret = PtlEQAlloc(portals4_btl->portals_ni_h, mca_btl_portals4_component.recv_queue_size, &portals4_btl->recv_eq_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } mca_btl_portals4_component.eqs_h[interface] = portals4_btl->recv_eq_h; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlEQAlloc (recv_eq=%d) OK for NI %d\n", portals4_btl->recv_eq_h, interface)); /* Create recv_idx portal table entry */ ret = PtlPTAlloc(portals4_btl->portals_ni_h, PTL_PT_ONLY_TRUNCATE, portals4_btl->recv_eq_h, REQ_BTL_TABLE_ID, &portals4_btl->recv_idx); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlPTAlloc (recv_idx) OK for NI %d recv_idx=%d", interface, portals4_btl->recv_idx)); if (portals4_btl->recv_idx != REQ_BTL_TABLE_ID) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlPTAlloc did not allocate the requested PT: %d", __FILE__, __LINE__, portals4_btl->recv_idx); goto error; } /* bind zero-length md for sending acks */ md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(portals4_btl->portals_ni_h, &md, &portals4_btl->zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMDBind failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface)); /* Bind MD across all memory */ md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = portals4_btl->recv_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(portals4_btl->portals_ni_h, &md, &portals4_btl->send_md_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMDBind failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret); goto error; } /* Handle long overflows */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; if (mca_btl_portals4_component.use_logical) { me.match_id.rank = PTL_RANK_ANY; } else { me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; } me.match_bits = BTL_PORTALS4_LONG_MSG; me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK | BTL_PORTALS4_SOURCE_MASK | BTL_PORTALS4_TAG_MASK; ret = PtlMEAppend(portals4_btl->portals_ni_h, portals4_btl->recv_idx, &me, PTL_OVERFLOW_LIST, NULL, &portals4_btl->long_overflow_me_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMEAppend failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK for NI %d", interface)); } ret = mca_btl_portals4_recv_enable(portals4_btl); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: Initialization of recv buffer failed: %d", __FILE__, __LINE__, ret); goto error; } return OPAL_SUCCESS; error: opal_output_verbose(1, opal_btl_base_framework.framework_output, "Error in btl_portals4_init_interface"); for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) { portals4_btl = mca_btl_portals4_component.btls[interface]; if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl); } mca_btl_portals4_component.num_btls = 0; if (NULL != mca_btl_portals4_component.btls) free(mca_btl_portals4_component.btls); if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h); mca_btl_portals4_component.btls = NULL; mca_btl_portals4_component.eqs_h = NULL; return OPAL_ERROR; }
static int ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, struct ompi_convertor_t *convertor, ompi_mtl_portals_request_t *ptl_request) { int ret; ptl_md_t md; ptl_handle_md_t md_h; size_t buflen; if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) { /* the buffer is sitting in the short message queue */ struct iovec iov; uint32_t iov_count = 1; size_t max_data; ompi_mtl_portals_recv_short_block_t *block = recv_event->ev.md.user_ptr; iov.iov_base = (((char*) recv_event->ev.md.start) + recv_event->ev.offset); iov.iov_len = recv_event->ev.mlength; max_data = iov.iov_len; /* see if this message filled the receive block */ if (recv_event->ev.md.length - (recv_event->ev.offset + recv_event->ev.mlength) < recv_event->ev.md.max_size) { block->full = true; } /* pull out the data */ if (iov.iov_len > 0) { ompi_convertor_unpack(convertor, &iov, &iov_count, &max_data ); } /* if synchronous, return an ack */ if (PTL_IS_SYNC_MSG(recv_event->ev)) { md.length = 0; md.start = (((char*) recv_event->ev.md.start) + recv_event->ev.offset); md.threshold = 1; /* send */ md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = NULL; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &md_h); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from PtlMDBind. Error code - %d \n",ret); abort(); } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "acking recv: 0x%016llx\n", recv_event->ev.match_bits)); ret = PtlPut(md_h, PTL_NO_ACK_REQ, recv_event->ev.initiator, OMPI_MTL_PORTALS_ACK_TABLE_ID, 0, recv_event->ev.hdr_data, 0, 0); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from PtlPut. Error code - %d \n",ret); abort(); } } /* finished with our buffer space */ ompi_mtl_portals_return_block_part(&ompi_mtl_portals, block); ompi_convertor_get_packed_size(convertor, &buflen); ptl_request->super.ompi_req->req_status.MPI_SOURCE = PTL_GET_SOURCE(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_TAG = PTL_GET_TAG(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_ERROR = (recv_event->ev.rlength > buflen) ? MPI_ERR_TRUNCATE : MPI_SUCCESS; ptl_request->super.ompi_req->req_status._count = recv_event->ev.mlength; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv complete: 0x%016llx\n", recv_event->ev.match_bits)); ptl_request->super.completion_callback(&ptl_request->super); } else { ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen, &ptl_request->free_after); if (OMPI_SUCCESS != ret) { opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf. Error code - %d \n",ret); abort(); } md.length = (recv_event->ev.rlength > buflen) ? buflen : recv_event->ev.rlength; md.threshold = 2; /* send and get */ md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; /* retain because it's unclear how many events we'll get here. Some implementations give just the REPLY, others give SEND and REPLY */ ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_RETAIN, &md_h); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf. Error code - %d \n",ret); abort(); } ptl_request->event_callback = ompi_mtl_portals_recv_progress; ret = PtlGet(md_h, recv_event->ev.initiator, OMPI_MTL_PORTALS_READ_TABLE_ID, 0, recv_event->ev.hdr_data, 0); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from PtlGet. Error code - %d \n",ret); abort(); } ptl_request->super.ompi_req->req_status.MPI_SOURCE = PTL_GET_SOURCE(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_TAG = PTL_GET_TAG(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_ERROR = (recv_event->ev.rlength > buflen) ? MPI_ERR_TRUNCATE : MPI_SUCCESS; } return OMPI_SUCCESS; }
int mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base, struct mca_btl_base_endpoint_t* btl_peer, void *local_address, uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base; mca_btl_portals4_frag_t *frag = NULL; ptl_md_t md; int ret; /* reserve space in the event queue for rdma operations immediately */ while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) > portals4_btl->portals_max_outstanding_ops) { OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n")); mca_btl_portals4_component_progress(); } OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag); if (NULL == frag){ OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_get: Incrementing portals_outstanding_ops=%d frag=%p", portals4_btl->portals_outstanding_ops, (void *)frag)); frag->rdma_cb.func = cbfunc; frag->rdma_cb.context = cbcontext; frag->rdma_cb.data = cbdata; frag->rdma_cb.local_handle = local_handle; frag->endpoint = btl_peer; frag->hdr.tag = MCA_BTL_TAG_MAX; /* Bind the memory */ md.start = (void *)local_address; md.length = size; md.options = 0; md.eq_handle = portals4_btl->recv_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(portals4_btl->portals_ni_h, &md, &frag->md_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d", __FILE__, __LINE__, ret); return OPAL_ERROR; } frag->match_bits = remote_handle->key; frag->length = md.length; frag->peer_proc = btl_peer->ptl_proc; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); ret = PtlGet(frag->md_h, 0, md.length, btl_peer->ptl_proc, portals4_btl->recv_idx, frag->match_bits, /* match bits */ 0, frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); PtlMDRelease(frag->md_h); frag->md_h = PTL_INVALID_HANDLE; return OPAL_ERROR; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n", md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits)); return OPAL_SUCCESS; }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_logical; ptl_pt_index_t logical_pt_index; ptl_process_t myself; struct timeval start, stop; int potato = 0; ENTRY_T potato_catcher; HANDLE_T potato_catcher_handle; ptl_md_t potato_launcher; ptl_handle_md_t potato_launcher_handle; int num_procs; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); num_procs = libtest_get_size(); if (NULL != getenv("MAKELEVEL") && num_procs > 2) { return 77; } CHECK_RETURNVAL(PtlNIInit (PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, NULL, NULL, &ni_logical)); CHECK_RETURNVAL(PtlSetMap(ni_logical, num_procs, libtest_get_mapping(ni_logical))); CHECK_RETURNVAL(PtlGetId(ni_logical, &myself)); CHECK_RETURNVAL(PtlPTAlloc (ni_logical, 0, PTL_EQ_NONE, PTL_PT_ANY, &logical_pt_index)); assert(logical_pt_index == 0); /* Now do the initial setup on ni_logical */ potato_catcher.start = &potato; potato_catcher.length = sizeof(potato); potato_catcher.uid = PTL_UID_ANY; potato_catcher.options = OPTIONS; #if INTERFACE == 1 potato_catcher.match_id.rank = PTL_RANK_ANY; potato_catcher.match_bits = 1; potato_catcher.ignore_bits = ~potato_catcher.match_bits; #endif CHECK_RETURNVAL(PtlCTAlloc(ni_logical, &potato_catcher.ct_handle)); CHECK_RETURNVAL(APPEND (ni_logical, logical_pt_index, &potato_catcher, PTL_PRIORITY_LIST, NULL, &potato_catcher_handle)); /* Now do a barrier (on ni_physical) to make sure that everyone has their * logical interface set up */ libtest_barrier(); /* now I can communicate between ranks with ni_logical */ /* set up the potato launcher */ potato_launcher.start = &potato; potato_launcher.length = sizeof(potato); potato_launcher.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_CT_SEND; potato_launcher.eq_handle = PTL_EQ_NONE; // i.e. don't queue send events CHECK_RETURNVAL(PtlCTAlloc(ni_logical, &potato_launcher.ct_handle)); CHECK_RETURNVAL(PtlMDBind (ni_logical, &potato_launcher, &potato_launcher_handle)); /* rank 0 starts the potato going */ if (myself.rank == 0) { ptl_process_t nextrank; nextrank.rank = myself.rank + 1; nextrank.rank *= (nextrank.rank <= num_procs - 1); gettimeofday(&start, NULL); CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0, potato_launcher.length, (LOOPS == 1) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ, nextrank, logical_pt_index, 1, 0, NULL, 1)); } { /* the potato-passing loop */ size_t waitfor; ptl_ct_event_t ctc; ptl_process_t nextrank; nextrank.rank = myself.rank + 1; nextrank.rank *= (nextrank.rank <= num_procs - 1); for (waitfor = 1; waitfor <= LOOPS; ++waitfor) { CHECK_RETURNVAL(PtlCTWait(potato_catcher.ct_handle, waitfor, &ctc)); // wait for potato assert(ctc.failure == 0); assert(ctc.success == waitfor); /* I have the potato! */ ++potato; if (potato < LOOPS * (num_procs)) { // otherwise, the recipient may have exited /* Bomb's away! */ if (myself.rank == 0) { CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0, potato_launcher.length, (waitfor == (LOOPS - 1)) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ, nextrank, logical_pt_index, 3, 0, NULL, 2)); } else { CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0, potato_launcher.length, (waitfor == LOOPS) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ, nextrank, logical_pt_index, 3, 0, NULL, 2)); } } } // make sure that last send completed before exiting CHECK_RETURNVAL(PtlCTWait(potato_launcher.ct_handle, LOOPS+1, &ctc)); assert(ctc.failure == 0); } if (myself.rank == 0) { double accumulate = 0.0; gettimeofday(&stop, NULL); accumulate = (stop.tv_sec + stop.tv_usec * 1e-6) - (start.tv_sec + start.tv_usec * 1e-6); /* calculate the average time waiting */ printf("Total time: %g secs\n", accumulate); accumulate /= LOOPS; printf("Average time around the loop: %g microseconds\n", accumulate * 1e6); accumulate /= num_procs; printf("Average catch-to-toss latency: %g microseconds\n", accumulate * 1e6); } /* cleanup */ CHECK_RETURNVAL(PtlMDRelease(potato_launcher_handle)); CHECK_RETURNVAL(PtlCTFree(potato_launcher.ct_handle)); CHECK_RETURNVAL(UNLINK(potato_catcher_handle)); CHECK_RETURNVAL(PtlCTFree(potato_catcher.ct_handle)); /* major cleanup */ CHECK_RETURNVAL(PtlPTFree(ni_logical, logical_pt_index)); CHECK_RETURNVAL(PtlNIFini(ni_logical)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }
int kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, unsigned int offset, int nob) { kptl_tx_t *tx; ptl_err_t ptlrc; kptl_msg_t *rxmsg = rx->rx_msg; kptl_peer_t *peer = rx->rx_peer; unsigned long flags; ptl_handle_md_t mdh; LASSERT (type == TX_TYPE_PUT_RESPONSE || type == TX_TYPE_GET_RESPONSE); tx = kptllnd_get_idle_tx(type); if (tx == NULL) { CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n", type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT", libcfs_id2str(peer->peer_id)); return -ENOMEM; } kptllnd_set_tx_peer(tx, peer); kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob); ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, PTL_UNLINK, &mdh); if (ptlrc != PTL_OK) { CERROR("PtlMDBind(%s) failed: %s(%d)\n", libcfs_id2str(peer->peer_id), kptllnd_errtype2str(ptlrc), ptlrc); tx->tx_status = -EIO; kptllnd_tx_decref(tx); return -EIO; } cfs_spin_lock_irqsave(&peer->peer_lock, flags); tx->tx_lnet_msg = lntmsg; /* lnet_finalize() will be called when tx is torn down, so I must * return success from here on... */ tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * CFS_HZ); tx->tx_rdma_mdh = mdh; tx->tx_active = 1; cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq); /* peer has now got my ref on 'tx' */ cfs_spin_unlock_irqrestore(&peer->peer_lock, flags); tx->tx_tposted = jiffies; if (type == TX_TYPE_GET_RESPONSE) ptlrc = PtlPut(mdh, tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, rx->rx_initiator, *kptllnd_tunables.kptl_portal, 0, /* acl cookie */ rxmsg->ptlm_u.rdma.kptlrm_matchbits, 0, /* offset */ (lntmsg != NULL) ? /* header data */ PTLLND_RDMA_OK : PTLLND_RDMA_FAIL); else ptlrc = PtlGet(mdh, rx->rx_initiator, *kptllnd_tunables.kptl_portal, 0, /* acl cookie */ rxmsg->ptlm_u.rdma.kptlrm_matchbits, 0); /* offset */ if (ptlrc != PTL_OK) { CERROR("Ptl%s failed: %s(%d)\n", (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", kptllnd_errtype2str(ptlrc), ptlrc); kptllnd_peer_close(peer, -EIO); /* Everything (including this RDMA) queued on the peer will * be completed with failure */ kptllnd_schedule_ptltrace_dump(); } return 0; }
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; MPID_Request *sreq = NULL; ptl_me_t me; int initial_iov_count, remaining_iov_count; ptl_md_t md; MPI_Aint last; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_SEND_MSG); MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG); MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm); sreq->dev.match.parts.rank = dest; sreq->dev.match.parts.tag = tag; sreq->dev.match.parts.context_id = comm->context_id + context_offset; sreq->ch.vc = vc; if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz)); if (data_sz <= PTL_LARGE_THRESHOLD) { /* Small message. Send all data eagerly */ if (dt_contig) { void *start = (char *)buf + dt_true_lb; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message"); REQ_PTL(sreq)->event_handler = handler_send; MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler)); if (start == NULL) ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); else ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler); goto fn_exit; } /* noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = sreq->dev.segment_size; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); if (last == sreq->dev.segment_size) { /* IOV is able to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " entire message fits in IOV"); md.start = sreq->dev.iov; md.length = sreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* IOV is not long enough to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " IOV too long: using bounce buffer"); MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; last = data_sz; MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]); MPIU_Assert(last == sreq->dev.segment_size); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* Large message. Send first chunk of data and let receiver get the rest */ if (dt_contig) { /* create ME for buffer so receiver can issue a GET for the data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message"); big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; } /* Large noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = PTL_LARGE_THRESHOLD; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); initial_iov_count = sreq->dev.iov_count; sreq->dev.segment_first = last; if (last == PTL_LARGE_THRESHOLD) { /* first chunk of message fits into IOV */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " first chunk fits in IOV"); if (initial_iov_count < MPL_IOV_LIMIT) { /* There may be space for the rest of the message in this IOV */ sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count; last = sreq->dev.segment_size; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count); remaining_iov_count = sreq->dev.iov_count; if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Entire message fit in one IOV */ int was_incomplete; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " rest of message fits in one IOV"); /* Create ME for remaining data */ me.start = &sreq->dev.iov[initial_iov_count]; me.length = remaining_iov_count; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC ); me.match_id = vc_ptl->id; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank); me.ignore_bits = 0; me.min_free = 0; MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p"); ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[0]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); /* increment the cc for the get operation */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); MPIU_Assert(was_incomplete); /* Create MD for first chunk */ md.start = sreq->dev.iov; md.length = initial_iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; }
int shmem_transport_startup(void) { int ret, i; ptl_process_t *desired = NULL; ptl_md_t md; ptl_le_t le; ptl_uid_t uid = PTL_UID_ANY; ptl_process_t my_id; #ifdef USE_ON_NODE_COMMS int num_on_node = 0; #endif #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING /* Make sure the heap and data bases are actually symmetric */ { int peer; uint64_t bases[2]; peer = (shmem_internal_my_pe + 1) % shmem_internal_num_pes; ret = shmem_runtime_get(peer, "portals4-bases", bases, sizeof(uint64_t) * 2); if (0 != ret) { fprintf(stderr, "[%03d] ERROR: runtime_put failed: %d\n", shmem_internal_my_pe, ret); return ret; } if ((uintptr_t) shmem_internal_heap_base != bases[0]) { fprintf(stderr, "[%03d] ERROR: heap base address does not match with rank %03d and virtual addressing is enabled\n", shmem_internal_my_pe, peer); return -1; } if ((uintptr_t) shmem_internal_data_base != bases[1]) { fprintf(stderr, "[%03d] ERROR: data base address does not match with rank %03d and virtual addressing is enabled\n", shmem_internal_my_pe, peer); return -1; } } #endif desired = malloc(sizeof(ptl_process_t) * shmem_internal_num_pes); if (NULL == desired) { ret = 1; goto cleanup; } ret = PtlGetPhysId(shmem_transport_portals4_ni_h, &my_id); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlGetPhysId failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } for (i = 0 ; i < shmem_internal_num_pes; ++i) { ret = shmem_runtime_get(i, "portals4-procid", &desired[i], sizeof(ptl_process_t)); if (0 != ret) { fprintf(stderr, "[%03d] ERROR: runtime_get failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } #ifdef USE_ON_NODE_COMMS /* update the connectivity map... */ if (desired[i].phys.nid == my_id.phys.nid) { SHMEM_SET_RANK_SAME_NODE(i, num_on_node++); if (num_on_node > 255) { fprintf(stderr, "[%03d] ERROR: Too many local ranks.\n", shmem_internal_my_pe); goto cleanup; } } #endif } ret = PtlSetMap(shmem_transport_portals4_ni_h, shmem_internal_num_pes, desired); if (PTL_OK != ret && PTL_IGNORED != ret) { fprintf(stderr, "[%03d] ERROR: PtlSetMap failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } ret = PtlGetUid(shmem_transport_portals4_ni_h, &uid); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlGetUid failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } shmem_transport_portals4_max_volatile_size = ni_limits.max_volatile_size; shmem_transport_portals4_max_atomic_size = ni_limits.max_atomic_size; shmem_transport_portals4_max_fetch_atomic_size = ni_limits.max_fetch_atomic_size; shmem_transport_portals4_max_msg_size = ni_limits.max_msg_size; if (shmem_transport_portals4_max_volatile_size < sizeof(long double complex)) { fprintf(stderr, "[%03d] ERROR: Max volatile size found to be %lu, too small to continue\n", shmem_internal_my_pe, (unsigned long) shmem_transport_portals4_max_volatile_size); goto cleanup; } if (shmem_transport_portals4_max_atomic_size < sizeof(long double complex)) { fprintf(stderr, "[%03d] ERROR: Max atomic size found to be %lu, too small to continue\n", shmem_internal_my_pe, (unsigned long) shmem_transport_portals4_max_atomic_size); goto cleanup; } if (shmem_transport_portals4_max_fetch_atomic_size < sizeof(long double complex)) { fprintf(stderr, "[%03d] ERROR: Max fetch atomic size found to be %lu, too small to continue\n", shmem_internal_my_pe, (unsigned long) shmem_transport_portals4_max_fetch_atomic_size); goto cleanup; } /* create portal table entries */ ret = PtlEQAlloc(shmem_transport_portals4_ni_h, shmem_transport_portals4_event_slots, &shmem_transport_portals4_eq_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlEQAlloc failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING ret = PtlPTAlloc(shmem_transport_portals4_ni_h, 0, shmem_transport_portals4_eq_h, shmem_transport_portals4_pt, &all_pt); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlPTAlloc of table entry failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } #else ret = PtlPTAlloc(shmem_transport_portals4_ni_h, 0, shmem_transport_portals4_eq_h, shmem_transport_portals4_data_pt, &data_pt); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlPTAlloc of data table failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } ret = PtlPTAlloc(shmem_transport_portals4_ni_h, 0, shmem_transport_portals4_eq_h, shmem_transport_portals4_heap_pt, &heap_pt); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlPTAlloc of heap table failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } #endif #ifndef ENABLE_HARD_POLLING /* target ct */ ret = PtlCTAlloc(shmem_transport_portals4_ni_h, &shmem_transport_portals4_target_ct_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlCTAlloc of target ct failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } le.ct_handle = shmem_transport_portals4_target_ct_h; #endif le.uid = uid; le.options = PTL_LE_OP_PUT | PTL_LE_OP_GET | PTL_LE_EVENT_LINK_DISABLE | PTL_LE_EVENT_SUCCESS_DISABLE; #if !defined(ENABLE_HARD_POLLING) le.options |= PTL_LE_EVENT_CT_COMM; #endif #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING le.start = NULL; le.length = PTL_SIZE_MAX; ret = PtlLEAppend(shmem_transport_portals4_ni_h, shmem_transport_portals4_pt, &le, PTL_PRIORITY_LIST, NULL, &shmem_transport_portals4_le_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlLEAppend of all memory failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } #else /* Open LE to heap section */ le.start = shmem_internal_heap_base; le.length = shmem_internal_heap_length; ret = PtlLEAppend(shmem_transport_portals4_ni_h, shmem_transport_portals4_heap_pt, &le, PTL_PRIORITY_LIST, NULL, &shmem_transport_portals4_heap_le_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlLEAppend of heap section failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } /* Open LE to data section */ le.start = shmem_internal_data_base; le.length = shmem_internal_data_length; ret = PtlLEAppend(shmem_transport_portals4_ni_h, shmem_transport_portals4_data_pt, &le, PTL_PRIORITY_LIST, NULL, &shmem_transport_portals4_data_le_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlLEAppend of data section failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } #endif /* Open MD to all memory */ ret = PtlCTAlloc(shmem_transport_portals4_ni_h, &shmem_transport_portals4_put_ct_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlCTAlloc of put ct failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } ret = PtlCTAlloc(shmem_transport_portals4_ni_h, &shmem_transport_portals4_get_ct_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlCTAlloc of get ct failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_CT_ACK; if (1 == PORTALS4_TOTAL_DATA_ORDERING) { md.options |= PTL_MD_UNORDERED; } md.eq_handle = shmem_transport_portals4_eq_h; md.ct_handle = shmem_transport_portals4_put_ct_h; ret = PtlMDBind(shmem_transport_portals4_ni_h, &md, &shmem_transport_portals4_put_event_md_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlMDBind of put MD failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_VOLATILE; if (1 == PORTALS4_TOTAL_DATA_ORDERING) { md.options |= PTL_MD_UNORDERED; } md.eq_handle = shmem_transport_portals4_eq_h; md.ct_handle = shmem_transport_portals4_put_ct_h; ret = PtlMDBind(shmem_transport_portals4_ni_h, &md, &shmem_transport_portals4_put_volatile_md_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlMDBind of put MD failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_SUCCESS_DISABLE; if (1 == PORTALS4_TOTAL_DATA_ORDERING) { md.options |= PTL_MD_UNORDERED; } md.eq_handle = shmem_transport_portals4_eq_h; md.ct_handle = shmem_transport_portals4_put_ct_h; ret = PtlMDBind(shmem_transport_portals4_ni_h, &md, &shmem_transport_portals4_put_cntr_md_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlMDBind of put cntr MD failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_SUCCESS_DISABLE; if (1 == PORTALS4_TOTAL_DATA_ORDERING) { md.options |= PTL_MD_UNORDERED; } md.eq_handle = shmem_transport_portals4_eq_h; md.ct_handle = shmem_transport_portals4_get_ct_h; ret = PtlMDBind(shmem_transport_portals4_ni_h, &md, &shmem_transport_portals4_get_md_h); if (PTL_OK != ret) { fprintf(stderr, "[%03d] ERROR: PtlMDBind of get MD failed: %d\n", shmem_internal_my_pe, ret); goto cleanup; } ret = 0; cleanup: if (NULL != desired) free(desired); return ret; }
void test_prepostME(int cache_size, int *cache_buf, ptl_handle_ni_t ni, int npeers, int nmsgs, int nbytes, int niters) { int i, j, k; double tmp, total = 0; ptl_handle_md_t send_md_handle; ptl_md_t send_md; ptl_process_t dest; ptl_size_t offset; ptl_pt_index_t index; ptl_handle_eq_t recv_eq_handle; ptl_handle_me_t me_handles[npeers * nmsgs]; ptl_event_t event; ptl_assert(PtlEQAlloc(ni, nmsgs * npeers + 1, &send_md.eq_handle), PTL_OK); send_md.start = send_buf; send_md.length = SEND_BUF_SIZE; send_md.options = PTL_MD_UNORDERED; send_md.ct_handle = PTL_CT_NONE; ptl_assert(PtlMDBind(ni, &send_md, &send_md_handle), PTL_OK); ptl_assert(PtlEQAlloc(ni, nmsgs * npeers + 1, &recv_eq_handle), PTL_OK); ptl_assert(PtlPTAlloc(ni, 0, recv_eq_handle, TestSameDirectionIndex, &index), PTL_OK); ptl_assert(TestSameDirectionIndex, index); tmp = timer(); for (j = 0; j < npeers; ++j) { for (k = 0; k < nmsgs; ++k) { ptl_process_t src; src.rank = recv_peers[j]; postME(ni, index, recv_buf + (nbytes * (k + j * nmsgs)), nbytes, src, magic_tag, &me_handles[k + j * nmsgs]); } } total += (timer() - tmp); for (i = 0; i < niters - 1; ++i) { cache_invalidate(cache_size, cache_buf); libtest_Barrier(); tmp = timer(); for (j = 0; j < npeers; ++j) { for (k = 0; k < nmsgs; ++k) { offset = (nbytes * (k + j * nmsgs)); dest.rank = send_peers[npeers - j - 1], ptl_assert(libtest_Put_offset(send_md_handle, offset, nbytes, dest, index, magic_tag, offset), PTL_OK); } } /* wait for sends */ for (j = 0; j < npeers * nmsgs; ++j) { ptl_assert(PtlEQWait(send_md.eq_handle, &event), PTL_OK); ptl_assert(event.type, PTL_EVENT_SEND); } /* wait for receives */ for (j = 0; j < npeers * nmsgs; j++) { PtlEQWait(recv_eq_handle, &event); } for (j = 0; j < npeers; ++j) { for (k = 0; k < nmsgs; ++k) { ptl_process_t src; src.rank = recv_peers[j]; postME(ni, index, recv_buf + (nbytes * (k + j * nmsgs)), nbytes, src, magic_tag, &me_handles[k + j * nmsgs]); } } total += (timer() - tmp); } libtest_Barrier(); tmp = timer(); for (j = 0; j < npeers; ++j) { for (k = 0; k < nmsgs; ++k) { offset = (nbytes * (k + j * nmsgs)); dest.rank = send_peers[npeers - j - 1], ptl_assert(libtest_Put_offset(send_md_handle, offset, nbytes, dest, index, magic_tag, offset), PTL_OK); } } /* wait for sends */ for (j = 0; j < npeers * nmsgs; ++j) { ptl_assert(PtlEQWait(send_md.eq_handle, &event), PTL_OK); ptl_assert(event.type, PTL_EVENT_SEND); } /* wait for receives */ for (j = 0; j < npeers * nmsgs; j++) { PtlEQWait(recv_eq_handle, &event); } total += (timer() - tmp); ptl_assert(PtlEQFree(send_md.eq_handle), PTL_OK); ptl_assert(PtlMDRelease(send_md_handle), PTL_OK); ptl_assert(PtlEQFree(recv_eq_handle), PTL_OK); ptl_assert(PtlPTFree(ni, index), PTL_OK); tmp = libtest_AllreduceDouble(total, PTL_SUM); display_result("pre-post", (niters * npeers * nmsgs * 2) / (tmp / world_size)); }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_h; ptl_pt_index_t pt_index; uint64_t *buf; ENTRY_T entry; HANDLE_T entry_h; ptl_md_t md; ptl_handle_md_t md_h; int rank; int num_procs; int ret; ptl_process_t *procs; ptl_handle_eq_t eq_h; ptl_event_t ev; ptl_hdr_data_t rcvd = 0; ptl_hdr_data_t goal = 0; ptl_hdr_data_t hdr_data = 1; ptl_size_t offset = sizeof(uint64_t); uint32_t distance; int sends = 0; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); rank = libtest_get_rank(); num_procs = libtest_get_size(); /* This test only succeeds if we have more than one rank */ if (num_procs < 2) return 77; CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, NULL, NULL, &ni_h)); procs = libtest_get_mapping(ni_h); CHECK_RETURNVAL(PtlSetMap(ni_h, num_procs, procs)); CHECK_RETURNVAL(PtlEQAlloc(ni_h, 1024, &eq_h)); CHECK_RETURNVAL(PtlPTAlloc(ni_h, 0, eq_h, 0, &pt_index)); assert(pt_index == 0); buf = malloc(sizeof(uint64_t) * num_procs); assert(NULL != buf); md.start = buf; md.length = sizeof(uint64_t) * num_procs; md.options = PTL_MD_UNORDERED; md.eq_handle = eq_h; md.ct_handle = PTL_CT_NONE; CHECK_RETURNVAL(PtlMDBind(ni_h, &md, &md_h)); entry.start = buf; entry.length = sizeof(uint64_t) * num_procs; entry.ct_handle = PTL_CT_NONE; entry.uid = PTL_UID_ANY; entry.options = OPTIONS; #if MATCHING == 1 entry.match_id.rank = PTL_RANK_ANY; entry.match_bits = 0; entry.ignore_bits = 0; entry.min_free = 0; #endif CHECK_RETURNVAL(APPEND(ni_h, pt_index, &entry, PTL_PRIORITY_LIST, NULL, &entry_h)); /* ensure ME is linked before the barrier */ CHECK_RETURNVAL(PtlEQWait(eq_h, &ev)); assert( ev.type == PTL_EVENT_LINK ); libtest_barrier(); /* Bruck's Concatenation Algorithm */ memcpy(buf, &rank, sizeof(uint64_t)); for (distance = 1; distance < num_procs; distance *= 2) { ptl_size_t to_xfer; int peer; ptl_process_t proc; if (rank >= distance) { peer = rank - distance; } else { peer = rank + (num_procs - distance); } to_xfer = sizeof(uint64_t) * MIN(distance, num_procs - distance); proc.rank = peer; CHECK_RETURNVAL(PtlPut(md_h, 0, to_xfer, PTL_NO_ACK_REQ, proc, 0, 0, offset, NULL, hdr_data)); sends += 1; /* wait for completion of the proper receive, and keep count of uncompleted sends. "rcvd" is an accumulator to deal with out-of-order receives, which are IDed by the hdr_data */ goal |= hdr_data; while ((rcvd & goal) != goal) { ret = PtlEQWait(eq_h, &ev); switch (ret) { case PTL_OK: if (ev.type == PTL_EVENT_SEND) { sends -= 1; } else { rcvd |= ev.hdr_data; assert(ev.type == PTL_EVENT_PUT); assert(ev.rlength == ev.mlength); assert((ev.rlength == to_xfer) || (ev.hdr_data != hdr_data)); } break; default: fprintf(stderr, "PtlEQWait failure: %d\n", ret); abort(); } } hdr_data <<= 1; offset += to_xfer; } /* wait for any SEND_END events not yet seen */ while (sends) { ret = PtlEQWait(eq_h, &ev); switch (ret) { case PTL_OK: assert( ev.type == PTL_EVENT_SEND ); sends -= 1; break; default: fprintf(stderr, "PtlEQWait failure: %d\n", ret); abort(); } } CHECK_RETURNVAL(UNLINK(entry_h)); CHECK_RETURNVAL(PtlMDRelease(md_h)); free(buf); libtest_barrier(); /* cleanup */ CHECK_RETURNVAL(PtlPTFree(ni_h, pt_index)); CHECK_RETURNVAL(PtlEQFree(eq_h)); CHECK_RETURNVAL(PtlNIFini(ni_h)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }
int ompi_mtl_portals_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct ompi_convertor_t *convertor, mca_pml_base_send_mode_t mode, bool blocking, mca_mtl_request_t *mtl_request) { int ret; ptl_match_bits_t match_bits; ptl_md_t md; ptl_handle_md_t md_h; ptl_handle_me_t me_h; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); mca_mtl_base_endpoint_t *endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml; ompi_mtl_portals_request_t *ptl_request = (ompi_mtl_portals_request_t*) mtl_request; size_t buflen; assert(mtl == &ompi_mtl_portals.base); ret = ompi_mtl_datatype_pack(convertor, &md.start, &buflen, &(ptl_request->free_after)); if (OMPI_SUCCESS != ret) return ret; md.length = buflen; ptl_request->event_callback = ompi_mtl_portals_send_progress; if ((MCA_PML_BASE_SEND_READY == mode)) { /* ready send (length doesn't matter) or short non-sync send. Eagerly send data and don't wait for completion */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_READY_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "ready send bits: 0x%016llx\n", match_bits)); md.threshold = 1; md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack; ret = PtlPut(md_h, PTL_NO_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, 0); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } else if (md.length > ompi_mtl_portals.eager_limit) { /* it's a long message - same protocol for all send modes other than ready */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_LONG_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "long send bits: 0x%016llx (%d)\n", match_bits, dest)); md.threshold = 2; /* send, {ack, get} */ md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h, OMPI_MTL_PORTALS_READ_TABLE_ID, endpoint->ptl_proc, (ptl_match_bits_t)(uintptr_t) ptl_request, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlMDAttach(me_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { PtlMEUnlink(me_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlPut(md_h, PTL_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, (ptl_hdr_data_t)(uintptr_t) ptl_request); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } else if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { /* short synchronous message */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_SHORT_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "short ssend bits: 0x%016llx (%d)\n", match_bits, dest)); md.threshold = 2; /* send, {ack, put} */ md.options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h, OMPI_MTL_PORTALS_ACK_TABLE_ID, endpoint->ptl_proc, (ptl_match_bits_t)(uintptr_t) ptl_request, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlMDAttach(me_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { PtlMEUnlink(me_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlPut(md_h, PTL_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, (ptl_hdr_data_t)(uintptr_t) ptl_request); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } else { /* short send message */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_SHORT_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "short send bits: 0x%016llx\n", match_bits)); md.threshold = 1; md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack; ret = PtlPut(md_h, PTL_NO_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, 0); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } return OMPI_SUCCESS; }
static int portals4_init_interface(void) { unsigned int ret; ptl_md_t md; ptl_me_t me; /* create event queues */ ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.send_queue_size, &ompi_mtl_portals4.send_eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_queue_size, &ompi_mtl_portals4.recv_eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Create send and long message (read) portal table entries */ ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, ompi_mtl_portals4.recv_eq_h, REQ_RECV_TABLE_ID, &ompi_mtl_portals4.recv_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } if (ompi_mtl_portals4.recv_idx != REQ_RECV_TABLE_ID) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", __FILE__, __LINE__, ompi_mtl_portals4.recv_idx); goto error; } ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE, ompi_mtl_portals4.send_eq_h, REQ_READ_TABLE_ID, &ompi_mtl_portals4.read_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } if (ompi_mtl_portals4.read_idx != REQ_READ_TABLE_ID) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", __FILE__, __LINE__, ompi_mtl_portals4.read_idx); goto error; } /* bind zero-length md for sending acks */ md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &ompi_mtl_portals4.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Bind MD across all memory */ md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = ompi_mtl_portals4.send_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &ompi_mtl_portals4.send_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Handle long overflows */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; if (ompi_mtl_portals4.use_logical) { me.match_id.rank = PTL_RANK_ANY; } else { me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; } me.match_bits = MTL_PORTALS4_LONG_MSG; me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, PTL_OVERFLOW_LIST, NULL, &ompi_mtl_portals4.long_overflow_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* attach short unex recv blocks */ ret = ompi_mtl_portals4_recv_short_init(); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: short receive block initialization failed: %d\n", __FILE__, __LINE__, ret); goto error; } ompi_mtl_portals4.opcount = 0; #if OPAL_ENABLE_DEBUG ompi_mtl_portals4.recv_opcount = 0; #endif #if OMPI_MTL_PORTALS4_FLOW_CONTROL ret = ompi_mtl_portals4_flowctl_init(); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n", __FILE__, __LINE__, ret); goto error; } #endif return OMPI_SUCCESS; error: if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) { PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); } if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(ompi_mtl_portals4.zero_md_h); } if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(ompi_mtl_portals4.send_md_h); } if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); } if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) { PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); } if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) { PtlEQFree(ompi_mtl_portals4.send_eq_h); } if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) { PtlEQFree(ompi_mtl_portals4.recv_eq_h); } return OMPI_ERROR; }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_handle; ptl_process_t *procs; int rank; ptl_pt_index_t pt_index, signal_pt_index; HANDLE_T signal_e_handle; HANDLE_T signal_e2_handle; int num_procs; ptl_handle_eq_t eq_handle; ptl_handle_ct_t ct_handle; ptl_handle_md_t md_handle; ptl_ni_limits_t limits_reqd, limits_actual; ENTRY_T value_e; limits_reqd.max_entries = 1024; limits_reqd.max_unexpected_headers = ITERS*2; limits_reqd.max_mds = 1024; limits_reqd.max_eqs = 1024; limits_reqd.max_cts = 1024; limits_reqd.max_pt_index = 64; limits_reqd.max_iovecs = 1024; limits_reqd.max_list_size = 1024; limits_reqd.max_triggered_ops = 1024; limits_reqd.max_msg_size = 1048576; limits_reqd.max_atomic_size = 1048576; limits_reqd.max_fetch_atomic_size = 1048576; limits_reqd.max_waw_ordered_size = 1048576; limits_reqd.max_war_ordered_size = 1048576; limits_reqd.max_volatile_size = 1048576; limits_reqd.features = 0; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); rank = libtest_get_rank(); num_procs = libtest_get_size(); if (num_procs < 2) { fprintf(stderr, "test_flowctl_noeq requires at least two processes\n"); return 77; } int iters; if (num_procs < ITERS) iters = ITERS*2+1; else iters = ITERS; CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, &limits_reqd, &limits_actual, &ni_handle)); procs = libtest_get_mapping(ni_handle); CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs)); if (0 == rank) { /* create data PT space */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * iters + 64, &eq_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5, &pt_index)); /* create signal ME */ CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 1, eq_handle, 6, &signal_pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = ct_handle; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS | PTL_LE_EVENT_CT_COMM; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_OVERFLOW_LIST, NULL, &signal_e_handle)); } else { ptl_md_t md; /* 16 extra just in case... */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, iters*2 + 16, &eq_handle)); md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = eq_handle; md.ct_handle = PTL_CT_NONE; CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle)); } fprintf(stderr,"at barrier \n"); libtest_barrier(); if (0 == rank) { ptl_ct_event_t ct; ptl_event_t ev; int ret, count = 0, saw_flowctl = 0; fprintf(stderr,"begin ctwait \n"); /* wait for signal counts */ CHECK_RETURNVAL(PtlCTWait(ct_handle, iters / 2 , &ct)); if (ct.success != iters / 2 || ct.failure != 0) { return 1; } fprintf(stderr,"done CT wait \n"); /* wait for event entries */ while (1) { ret = PtlEQGet(eq_handle, &ev); if (PTL_OK == ret) { count++; fprintf(stderr, "found EQ value \n"); } else if (ret == PTL_EQ_EMPTY) { continue; } else { fprintf(stderr, "0: Unexpected return code from EQGet: %d\n", ret); return 1; } if (ev.type == PTL_EVENT_PT_DISABLED) { saw_flowctl++; break; } } fprintf(stderr, "0: Saw %d flowctl\n", saw_flowctl); if (saw_flowctl == 0) { return 1; } /* Now clear out all of the unexpected messages so we can clean up everything */ CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e2_handle)); ret = PTL_OK; while (ret != PTL_EQ_EMPTY) ret = PtlEQGet(eq_handle, &ev); } else { ptl_process_t target; ptl_event_t ev; int ret, count = 0, fails = 0; int i; target.rank = 0; printf("beginning puts \n"); for (i = 0 ; i < iters ; ++i) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, NULL, 0)); usleep(100); } while (count < iters) { ret = PtlEQGet(eq_handle, &ev); if (PTL_EQ_EMPTY == ret) { continue; } else if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { count++; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { count++; fails++; } else if (ev.ni_fail_type == PTL_EQ_EMPTY) { continue; } else if (ev.ni_fail_type == PTL_EQ_DROPPED) { continue; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } fprintf(stderr, "%d: Saw %d of %d ACKs as fails\n", rank, fails, count); } fprintf(stderr,"at final barrier \n"); libtest_barrier(); if (0 == rank) { CHECK_RETURNVAL(UNLINK(signal_e_handle)); CHECK_RETURNVAL(UNLINK(signal_e2_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index)); CHECK_RETURNVAL(PtlCTFree(ct_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } else { CHECK_RETURNVAL(PtlMDRelease(md_handle)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } fprintf(stderr,"final cleanup \n"); CHECK_RETURNVAL(PtlNIFini(ni_handle)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_handle; ptl_process_t *procs; int rank; ptl_pt_index_t pt_index, signal_pt_index; HANDLE_T value_e_handle, signal_e_handle; int num_procs; ptl_handle_eq_t eq_handle; ptl_handle_ct_t ct_handle; ptl_handle_md_t md_handle; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); rank = libtest_get_rank(); num_procs = libtest_get_size(); if (num_procs < 2) { fprintf(stderr, "test_flowctl_noeq requires at least two processes\n"); return 77; } CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, NULL, NULL, &ni_handle)); procs = libtest_get_mapping(ni_handle); CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs)); if (0 == rank) { ENTRY_T value_e; /* create data ME */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * ITERS / 2, &eq_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5, &pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = PTL_CT_NONE; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &value_e_handle)); /* create signal ME */ CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 0, PTL_EQ_NONE, 6, &signal_pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = ct_handle; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS | PTL_LE_EVENT_SUCCESS_DISABLE | PTL_LE_EVENT_CT_COMM; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 6, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e_handle)); } else { ptl_md_t md; /* 16 extra just in case... */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, ITERS * 2 + 16, &eq_handle)); md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = eq_handle; md.ct_handle = PTL_CT_NONE; CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle)); } libtest_barrier(); if (0 == rank) { ptl_ct_event_t ct; ptl_event_t ev; int ret, count = 0, saw_dropped = 0, saw_flowctl = 0; /* wait for signal counts */ CHECK_RETURNVAL(PtlCTWait(ct_handle, num_procs - 1, &ct)); if (ct.success != num_procs - 1 || ct.failure != 0) { return 1; } /* wait for event entries */ while (count < ITERS * (num_procs - 1)) { ret = PtlEQWait(eq_handle, &ev); if (PTL_OK == ret) { ; } else if (PTL_EQ_DROPPED == ret) { saw_dropped++; if (ev.type == PTL_EVENT_PT_DISABLED){ saw_flowctl++; CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index)); } break; } else { fprintf(stderr, "0: Unexpected return code from EQWait: %d\n", ret); return 1; } if (ev.type == PTL_EVENT_PT_DISABLED) { CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index)); saw_flowctl++; } else { count++; } } fprintf(stderr, "0: Saw %d dropped, %d flowctl\n", saw_dropped, saw_flowctl); if (saw_flowctl == 0) { return 1; } } else { ptl_process_t target; ptl_event_t ev; int ret, count = 0, fails = 0; int i; int *fail_seen; fail_seen = malloc(sizeof(int) * ITERS); if (NULL == fail_seen) { fprintf(stderr, "%d: malloc failed\n", rank); return 1; } memset(fail_seen, 0, sizeof(int) * ITERS); target.rank = 0; for (i = 0 ; i < ITERS ; ++i) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, (void*)(size_t)i, 0)); usleep(100); } while (count < ITERS) { ret = PtlEQGet(eq_handle, &ev); if (PTL_EQ_EMPTY == ret) { continue; } else if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { count++; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { int iter = (size_t) ev.user_ptr; if (fail_seen[iter]++ > 0) { fprintf(stderr, "%d: Double report of PT_DISABLED for " "iteration %d\n", rank, iter); return 1; } count++; fails++; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } fprintf(stderr, "%d: Saw %d of %d events as fails\n", rank, fails, count); CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_NO_ACK_REQ, target, 6, 0, 0, NULL, 0)); /* wait for the send event on the last put */ CHECK_RETURNVAL(PtlEQWait(eq_handle, &ev)); while (fails > 0) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, NULL, 0)); while (1) { ret = PtlEQWait(eq_handle, &ev); if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQWait returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { fails--; break; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { break; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } } } libtest_barrier(); if (0 == rank) { CHECK_RETURNVAL(UNLINK(signal_e_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index)); CHECK_RETURNVAL(PtlCTFree(ct_handle)); CHECK_RETURNVAL(UNLINK(value_e_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } else { CHECK_RETURNVAL(PtlMDRelease(md_handle)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } CHECK_RETURNVAL(PtlNIFini(ni_handle)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }
/* /!\ Called for each processes /!\ */ static int portals4_init_query(bool enable_progress_threads, bool enable_mpi_threads) { int ret; ptl_md_t md; ptl_me_t me; /* Initialize Portals and create a physical, matching interface */ ret = PtlInit(); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlInit failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, &mca_coll_portals4_component.ni_limits, &mca_coll_portals4_component.ni_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlNIInit failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlGetid failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* FIX ME: Need to make sure our ID matches with the MTL... */ ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlGetUid failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlEQAlloc(mca_coll_portals4_component.ni_h, MCA_COLL_PORTALS4_EQ_SIZE, &mca_coll_portals4_component.eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlPTAlloc(mca_coll_portals4_component.ni_h, 0, mca_coll_portals4_component.eq_h, REQ_COLL_TABLE_ID, &mca_coll_portals4_component.pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n", __FILE__, __LINE__, mca_coll_portals4_component.finish_pt_idx); return OMPI_ERROR; } ret = PtlPTAlloc(mca_coll_portals4_component.ni_h, 0, mca_coll_portals4_component.eq_h, REQ_COLL_FINISH_TABLE_ID, &mca_coll_portals4_component.finish_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n", __FILE__, __LINE__, mca_coll_portals4_component.finish_pt_idx); return OMPI_ERROR; } /* Bind MD/MDs across all memory. We prefer (for obvious reasons) to have a single MD across all of memory */ memset(&md, 0, sizeof(ptl_md_t)); md.start = 0; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(mca_coll_portals4_component.ni_h, &md, &mca_coll_portals4_component.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(mca_coll_portals4_component.ni_h, &md, &mca_coll_portals4_component.data_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length)); /* setup finish ack ME */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = 0; me.ignore_bits = 0; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx, &me, PTL_PRIORITY_LIST, NULL, &mca_coll_portals4_component.finish_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* This ME is used for RTR exchange only */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; /* Note : the RTR bit must be set to match this ME, * this allows to discriminate the RTR from data flow * (especially for the Barrier operations) */ COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0); me.ignore_bits = ~COLL_PORTALS4_RTR_MASK; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx, &me, PTL_OVERFLOW_LIST, NULL, &mca_coll_portals4_component.unex_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* activate progress callback */ ret = opal_progress_register(portals4_progress); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: opal_progress_register failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p) { int mpi_errno = MPI_SUCCESS; int ret; ptl_md_t md; ptl_ni_limits_t desired; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PTL_INIT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PTL_INIT); /* first make sure that our private fields in the vc and req fit into the area provided */ MPIR_Assert(sizeof(MPID_nem_ptl_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN); MPIR_Assert(sizeof(MPID_nem_ptl_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN); /* Make sure our IOV is the same as portals4's IOV */ MPIR_Assert(sizeof(ptl_iovec_t) == sizeof(MPL_IOV)); MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_base)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_BUF))); MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_len)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_LEN))); MPIR_Assert(sizeof(((ptl_iovec_t*)0)->iov_len) == sizeof(((MPL_IOV*)0)->MPL_IOV_LEN)); mpi_errno = MPIDI_CH3I_Register_anysource_notification(MPID_nem_ptl_anysource_posted, MPID_nem_ptl_anysource_matched); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIDI_Anysource_improbe_fn = MPID_nem_ptl_anysource_improbe; /* init portals */ ret = PtlInit(); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlinit", "**ptlinit %s", MPID_nem_ptl_strerror(ret)); /* do an interface pre-init to get the default limits struct */ ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL, PTL_PID_ANY, NULL, &desired, &MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); /* finalize the interface so we can re-init with our desired maximums */ ret = PtlNIFini(MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlnifini", "**ptlnifini %s", MPID_nem_ptl_strerror(ret)); /* set higher limits if they are determined to be too low */ if (desired.max_unexpected_headers < UNEXPECTED_HDR_COUNT && getenv("PTL_LIM_MAX_UNEXPECTED_HEADERS") == NULL) desired.max_unexpected_headers = UNEXPECTED_HDR_COUNT; if (desired.max_list_size < LIST_SIZE && getenv("PTL_LIM_MAX_LIST_SIZE") == NULL) desired.max_list_size = LIST_SIZE; if (desired.max_entries < ENTRY_COUNT && getenv("PTL_LIM_MAX_ENTRIES") == NULL) desired.max_entries = ENTRY_COUNT; /* do the real init */ ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL, PTL_PID_ANY, &desired, &MPIDI_nem_ptl_ni_limits, &MPIDI_nem_ptl_ni); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); /* allocate EQs for each portal */ ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_get_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_control_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_rpt_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate a separate EQ for origin events. with this, we can implement rate-limit operations to prevent a locally triggered flow control even */ ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_origin_eq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for matching messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_eq, PTL_PT_ANY, &MPIDI_nem_ptl_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for large messages where receiver does a get */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_get_eq, PTL_PT_ANY, &MPIDI_nem_ptl_get_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_control_eq, PTL_PT_ANY, &MPIDI_nem_ptl_control_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq, PTL_PT_ANY, &MPIDI_nem_ptl_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq, PTL_PT_ANY, &MPIDI_nem_ptl_get_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allocate portal for MPICH control messages */ ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq, PTL_PT_ANY, &MPIDI_nem_ptl_control_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* create an MD that covers all of memory */ md.start = 0; md.length = (ptl_size_t)-1; md.options = 0x0; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &MPIDI_nem_ptl_global_md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); /* currently, rportlas only works with a single NI and EQ */ ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, ORIGIN_EVENTS, get_target_info); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret)); /* allow rportal to manage the primary portal and retransmit if needed */ ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_pt, MPIDI_nem_ptl_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* allow rportal to manage the get and control portals, but we * don't expect retransmission to be needed on the get portal, so * we pass PTL_PT_ANY as the dummy portal. unfortunately, portals * does not have an "invalid" PT constant, which would have been * more appropriate to pass over here. */ ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_get_pt, MPIDI_nem_ptl_get_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_control_pt, MPIDI_nem_ptl_control_rpt_pt); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret)); /* create business card */ mpi_errno = get_business_card(pg_rank, bc_val_p, val_max_sz_p); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* init other modules */ mpi_errno = MPID_nem_ptl_poll_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_ptl_nm_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PTL_INIT); return mpi_errno; fn_fail: goto fn_exit; }
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct ompi_info_t *info, int flavor, int *model) { ompi_osc_portals4_module_t *module = NULL; int ret = OMPI_ERROR; int tmp; ptl_md_t md; ptl_me_t me; char *name; if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED; /* create module structure */ module = (ompi_osc_portals4_module_t*) calloc(1, sizeof(ompi_osc_portals4_module_t)); if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; /* fill in the function pointer part */ memcpy(module, &ompi_osc_portals4_module_template, sizeof(ompi_osc_base_module_t)); /* fill in our part */ if (MPI_WIN_FLAVOR_ALLOCATE == flavor) { module->free_after = *base = malloc(size); if (NULL == *base) goto error; } else { module->free_after = NULL; } ret = ompi_comm_dup(comm, &module->comm); if (OMPI_SUCCESS != ret) goto error; opal_output_verbose(1, ompi_osc_base_framework.framework_output, "portals4 component creating window with id %d", ompi_comm_get_cid(module->comm)); asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm)); ompi_win_set_name(win, name); free(name); /* share everyone's displacement units. Only do an allgather if strictly necessary, since it requires O(p) state. */ tmp = disp_unit; ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0, module->comm, module->comm->c_coll.coll_bcast_module); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: MPI_Bcast failed: %d\n", __FILE__, __LINE__, ret); goto error; } tmp = (tmp == disp_unit) ? 1 : 0; ret = module->comm->c_coll.coll_allreduce(MPI_IN_PLACE, &tmp, 1, MPI_INT, MPI_LAND, module->comm, module->comm->c_coll.coll_allreduce_module); if (OMPI_SUCCESS != ret) goto error; if (tmp == 1) { module->disp_unit = disp_unit; module->disp_units = NULL; } else { module->disp_unit = -1; module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm)); ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT, module->disp_units, 1, MPI_INT, module->comm, module->comm->c_coll.coll_allgather_module); if (OMPI_SUCCESS != ret) goto error; } module->ni_h = mca_osc_portals4_component.matching_ni_h; module->pt_idx = mca_osc_portals4_component.matching_pt_idx; ret = PtlCTAlloc(module->ni_h, &(module->ct_h)); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; ret = PtlMDBind(module->ni_h, &md, &module->md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; ret = PtlMDBind(module->ni_h, &md, &module->req_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } if (MPI_WIN_FLAVOR_DYNAMIC == flavor) { me.start = 0; me.length = PTL_SIZE_MAX; } else { me.start = *base; me.length = size; } me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = module->comm->c_contextid; me.ignore_bits = 0; ret = PtlMEAppend(module->ni_h, module->pt_idx, &me, PTL_PRIORITY_LIST, NULL, &module->data_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } me.start = &module->state; me.length = sizeof(module->state); me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = module->comm->c_contextid | OSC_PORTALS4_MB_CONTROL; me.ignore_bits = 0; ret = PtlMEAppend(module->ni_h, module->pt_idx, &me, PTL_PRIORITY_LIST, NULL, &module->control_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } module->opcount = 0; module->match_bits = module->comm->c_contextid; module->atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ? mca_osc_portals4_component.matching_atomic_max : MIN(mca_osc_portals4_component.matching_atomic_max, mca_osc_portals4_component.matching_atomic_ordered_size); module->fetch_atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ? mca_osc_portals4_component.matching_fetch_atomic_max : MIN(mca_osc_portals4_component.matching_fetch_atomic_max, mca_osc_portals4_component.matching_atomic_ordered_size); module->zero = 0; module->one = 1; module->start_group = NULL; module->post_group = NULL; module->state.post_count = 0; module->state.complete_count = 0; if (check_config_value_bool("no_locks", info)) { module->state.lock = LOCK_ILLEGAL; } else { module->state.lock = LOCK_UNLOCKED; } OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t); module->passive_target_access_epoch = false; #if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 *model = MPI_WIN_UNIFIED; #else *model = MPI_WIN_SEPARATE; #endif win->w_osc_module = &module->super; PtlAtomicSync(); /* Make sure that everyone's ready to receive. */ module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); return OMPI_SUCCESS; error: /* BWB: FIX ME: This is all wrong... */ if (0 != module->ct_h) PtlCTFree(module->ct_h); if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h); if (0 != module->req_md_h) PtlMDRelease(module->req_md_h); if (0 != module->md_h) PtlMDRelease(module->md_h); if (NULL != module->comm) ompi_comm_free(&module->comm); if (NULL != module) free(module); return ret; }
/* called when a receive should be progressed */ int ompi_mtl_portals4_recv_progress(ptl_event_t *ev, ompi_mtl_portals4_base_request_t* ptl_base_request) { int ret; ompi_mtl_portals4_recv_request_t* ptl_request = (ompi_mtl_portals4_recv_request_t*) ptl_base_request; size_t msg_length = 0; switch (ev->type) { case PTL_EVENT_PUT: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event", ptl_request->opcount, ev->hdr_data)); if (ev->ni_fail_type != PTL_NI_OK) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); goto callback_error; } msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits); ptl_request->super.super.ompi_req->req_status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); if (msg_length > ptl_request->delivery_len) { opal_output_verbose(1, ompi_mtl_base_output, "truncate expected: %ld %ld", msg_length, ptl_request->delivery_len); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; } #if OPAL_ENABLE_DEBUG ptl_request->hdr_data = ev->hdr_data; #endif if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { ptl_md_t md; md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit; md.length = ((msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit; md.options = 0; md.eq_handle = ompi_mtl_portals4.eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &ptl_request->md_h); if (PTL_OK != ret) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlMDBind failed: %d", __FILE__, __LINE__, ret); goto callback_error; } ret = PtlGet(ptl_request->md_h, 0, md.length, ev->initiator, ompi_mtl_portals4.read_idx, ev->hdr_data, ompi_mtl_portals4.eager_limit, ptl_request); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); PtlMDRelease(ptl_request->md_h); if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); goto callback_error; } } else { /* make sure the data is in the right place */ ret = ompi_mtl_datatype_unpack(ptl_request->convertor, ev->start, ev->mlength); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: ompi_mtl_datatype_unpack failed: %d", __FILE__, __LINE__, ret); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; } ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, expected", ptl_request->opcount, ptl_request->hdr_data)); ptl_request->super.super.completion_callback(&ptl_request->super.super); } break; case PTL_EVENT_REPLY: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got reply event", ptl_request->opcount, ptl_request->hdr_data)); if (ev->ni_fail_type != PTL_NI_OK) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); PtlMDRelease(ptl_request->md_h); goto callback_error; } /* set the status - most of this filled in right after issuing the PtlGet */ ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; if (ompi_mtl_portals4.protocol == rndv) { ptl_request->super.super.ompi_req->req_status._ucount += ompi_mtl_portals4.eager_limit; } /* make sure the data is in the right place. Use _ucount for the total length because it will be set correctly for all three protocols. mlength is only correct for eager, and delivery_len is the length of the buffer, not the length of the send. */ ret = ompi_mtl_datatype_unpack(ptl_request->convertor, ptl_request->delivery_ptr, ptl_request->super.super.ompi_req->req_status._ucount); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: ompi_mtl_datatype_unpack failed: %d", __FILE__, __LINE__, ret); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; } PtlMDRelease(ptl_request->md_h); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, reply", ptl_request->opcount, ptl_request->hdr_data)); ptl_request->super.super.completion_callback(&ptl_request->super.super); break; case PTL_EVENT_PUT_OVERFLOW: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put_overflow event", ptl_request->opcount, ev->hdr_data)); if (ev->ni_fail_type != PTL_NI_OK) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); goto callback_error; } msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data); ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits); ptl_request->super.super.ompi_req->req_status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); if (msg_length > ptl_request->delivery_len) { opal_output_verbose(1, ompi_mtl_base_output, "truncate unexpected: %ld %ld %d", msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; } #if OPAL_ENABLE_DEBUG ptl_request->hdr_data = ev->hdr_data; #endif /* overflow case. Short messages have the buffer stashed somewhere. Long messages left in buffer at the source */ if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) { ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; if (ev->mlength > 0) { struct iovec iov; uint32_t iov_count = 1; size_t max_data; iov.iov_base = (char*) ev->start; iov.iov_len = ev->mlength; max_data = iov.iov_len; ret = opal_convertor_unpack(ptl_request->convertor, &iov, &iov_count, &max_data ); if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); if (ret < 0) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: opal_convertor_unpack failed: %d", __FILE__, __LINE__, ret); goto callback_error; } } /* if it's a sync, send the ack */ if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) { OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) sending sync ack", ptl_request->opcount, ptl_request->hdr_data)); ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, 0, PTL_NO_ACK_REQ, ev->initiator, ompi_mtl_portals4.read_idx, ev->hdr_data, 0, NULL, 0); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlPut failed: %d", __FILE__, __LINE__, ret); goto callback_error; } } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short (0x%lx)", ptl_request->opcount, ptl_request->hdr_data, (long) ev->start)); ptl_request->super.super.completion_callback(&ptl_request->super.super); } else { ptl_md_t md; if (ev->mlength > 0) { /* if rndv or triggered, copy the eager part to the right place */ memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength); } md.start = (char*) ptl_request->delivery_ptr + ev->mlength; md.length = ((msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length) - ev->mlength; md.options = 0; md.eq_handle = ompi_mtl_portals4.eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &ptl_request->md_h); if (PTL_OK != ret) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlMDBind failed: %d", __FILE__, __LINE__, ret); goto callback_error; } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) getting long data", ptl_request->opcount, ptl_request->hdr_data)); ret = PtlGet(ptl_request->md_h, 0, md.length, ev->initiator, ompi_mtl_portals4.read_idx, ev->hdr_data, ev->mlength, ptl_request); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); PtlMDRelease(ptl_request->md_h); goto callback_error; } } break; default: opal_output_verbose(1, ompi_mtl_base_output, "Unhandled receive callback with event type %d", ev->type); return OMPI_ERROR; } return OMPI_SUCCESS; callback_error: ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ompi_mtl_portals4_get_error(ret); ptl_request->super.super.completion_callback(&ptl_request->super.super); return OMPI_SUCCESS; }