static void postME(ptl_handle_ni_t ni, ptl_pt_index_t index, void *start, ptl_size_t length, ptl_process_t src, int tag, ptl_handle_me_t *mh) { int rc; ptl_me_t me; unsigned int options = PTL_ME_OP_PUT | PTL_ME_ACK_DISABLE | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_LINK_DISABLE; me.start = (char *)start; me.length = length; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = PTL_UID_ANY; me.options = options; me.match_id = src; me.match_bits = tag; me.ignore_bits = 0; rc = PtlMEAppend(ni, index, &me, PTL_PRIORITY_LIST, NULL, mh); LIBTEST_CHECK(rc, "Error in libtest_CreateME(): PtlMEAppend"); }
static inline int ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) { ptl_match_bits_t match_bits = MTL_PORTALS4_SHORT_MSG; ptl_match_bits_t ignore_bits; ptl_me_t me; int ret; ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK; me.start = block->start; me.length = ompi_mtl_portals4.recv_short_size; me.ct_handle = PTL_CT_NONE; me.min_free = ompi_mtl_portals4.short_limit; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_MANAGE_LOCAL | PTL_ME_MAY_ALIGN; if (ompi_mtl_portals4.use_logical) { me.match_id.rank = PTL_RANK_ANY; } else { me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; } me.match_bits = match_bits; me.ignore_bits = ignore_bits; OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex); block->status = BLOCK_STATUS_WAITING_LINK; OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex); ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, PTL_OVERFLOW_LIST, &block->request, &block->me_h); if (OPAL_LIKELY(ret == PTL_OK)) { ret = OMPI_SUCCESS; } else { ret = ompi_mtl_portals4_get_error(ret); } return ret; }
static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl_match_bits_t match_bits, MPID_Request *sreq) { int i, ret, was_incomplete; MPID_nem_ptl_vc_area *vc_ptl; ptl_me_t me; vc_ptl = VC_PTL(vc); me.start = buf; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE ); me.match_id = vc_ptl->id; me.match_bits = match_bits; me.ignore_bits = 0; me.min_free = 0; /* allocate enough handles to cover all get operations */ REQ_PTL(sreq)->get_me_p = MPIU_Malloc(sizeof(ptl_handle_me_t) * ((left_to_send / MPIDI_nem_ptl_ni_limits.max_msg_size) + 1)); /* queue up as many entries as necessary to describe the entire message */ for (i = 0; left_to_send > 0; i++) { /* send up to the maximum allowed by the portals interface */ if (left_to_send > MPIDI_nem_ptl_ni_limits.max_msg_size) me.length = MPIDI_nem_ptl_ni_limits.max_msg_size; else me.length = left_to_send; ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[i]); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); MPIU_Assert(ret == 0); /* increment the cc for each get operation */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); MPIU_Assert(was_incomplete); /* account for what has been sent */ me.start = (char *)me.start + me.length; left_to_send -= me.length; } }
static inline int ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) { ptl_match_bits_t match_bits = MTL_PORTALS4_SHORT_MSG; ptl_match_bits_t ignore_bits; ptl_me_t me; int ret; opal_list_remove_item(&ompi_mtl_portals4.waiting_recv_short_blocks, &block->base); ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK; me.start = block->start; me.length = ompi_mtl_portals4.recv_short_size; me.ct_handle = PTL_CT_NONE; me.min_free = ompi_mtl_portals4.eager_limit; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_MANAGE_LOCAL | PTL_ME_MAY_ALIGN; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = match_bits; me.ignore_bits = ignore_bits; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, PTL_OVERFLOW_LIST, &block->request, &block->me_h); if (OPAL_LIKELY(ret == PTL_OK)) { ret = OMPI_SUCCESS; opal_list_append(&ompi_mtl_portals4.active_recv_short_blocks, &block->base); } else { ret = ompi_mtl_portals4_get_error(ret); } return ret; }
int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt, ptl_handle_me_t * me_handle) { int ret; ptl_me_t me; ptl_process_t id; MPIDI_STATE_DECL(MPID_STATE_RPTLI_POST_CONTROL_BUFFER); MPIDI_FUNC_ENTER(MPID_STATE_RPTLI_POST_CONTROL_BUFFER); id.phys.nid = PTL_NID_ANY; id.phys.pid = PTL_PID_ANY; me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = (PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE); me.match_id = id; me.match_bits = 0; me.ignore_bits = 0; me.min_free = 0; while (1) { ret = PtlMEAppend(ni_handle, pt, &me, PTL_PRIORITY_LIST, NULL, me_handle); if (ret != PTL_NO_SPACE) break; } RPTLU_ERR_POP(ret, "Error appending empty buffer to priority list\n"); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_RPTLI_POST_CONTROL_BUFFER); return ret; fn_fail: goto fn_exit; }
static int append_overflow(int i) { int mpi_errno = MPI_SUCCESS; int ret; ptl_me_t me; ptl_process_t id_any; MPIDI_STATE_DECL(MPID_STATE_APPEND_OVERFLOW); MPIDI_FUNC_ENTER(MPID_STATE_APPEND_OVERFLOW); MPIU_Assert(i >= 0 && i < NUM_OVERFLOW_ME); id_any.phys.pid = PTL_PID_ANY; id_any.phys.nid = PTL_NID_ANY; me.start = overflow_buf[i]; me.length = OVERFLOW_LENGTH; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_MANAGE_LOCAL | PTL_ME_NO_TRUNCATE | PTL_ME_MAY_ALIGN | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE ); me.match_id = id_any; me.match_bits = 0; me.ignore_bits = ~((ptl_match_bits_t)0); me.min_free = PTL_LARGE_THRESHOLD; /* if there is no space to append the entry, process outstanding events and try again */ ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i, &overflow_me_handle[i]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_APPEND_OVERFLOW); return mpi_errno; fn_fail: goto fn_exit; }
static int btl_portals4_init_interface(void) { mca_btl_portals4_module_t *portals4_btl; unsigned int ret, interface; ptl_md_t md; ptl_me_t me; // The initialisation of EQ, PT and ME must be done after the SetMap ! for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) { portals4_btl = mca_btl_portals4_component.btls[interface]; /* create event queue */ ret = PtlEQAlloc(portals4_btl->portals_ni_h, mca_btl_portals4_component.recv_queue_size, &portals4_btl->recv_eq_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } mca_btl_portals4_component.eqs_h[interface] = portals4_btl->recv_eq_h; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlEQAlloc (recv_eq=%d) OK for NI %d\n", portals4_btl->recv_eq_h, interface)); /* Create recv_idx portal table entry */ ret = PtlPTAlloc(portals4_btl->portals_ni_h, PTL_PT_ONLY_TRUNCATE, portals4_btl->recv_eq_h, REQ_BTL_TABLE_ID, &portals4_btl->recv_idx); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlPTAlloc (recv_idx) OK for NI %d recv_idx=%d", interface, portals4_btl->recv_idx)); if (portals4_btl->recv_idx != REQ_BTL_TABLE_ID) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlPTAlloc did not allocate the requested PT: %d", __FILE__, __LINE__, portals4_btl->recv_idx); goto error; } /* bind zero-length md for sending acks */ md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(portals4_btl->portals_ni_h, &md, &portals4_btl->zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMDBind failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface)); /* Bind MD across all memory */ md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = portals4_btl->recv_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(portals4_btl->portals_ni_h, &md, &portals4_btl->send_md_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMDBind failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret); goto error; } /* Handle long overflows */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; if (mca_btl_portals4_component.use_logical) { me.match_id.rank = PTL_RANK_ANY; } else { me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; } me.match_bits = BTL_PORTALS4_LONG_MSG; me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK | BTL_PORTALS4_SOURCE_MASK | BTL_PORTALS4_TAG_MASK; ret = PtlMEAppend(portals4_btl->portals_ni_h, portals4_btl->recv_idx, &me, PTL_OVERFLOW_LIST, NULL, &portals4_btl->long_overflow_me_h); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMEAppend failed for NI %d: %d", __FILE__, __LINE__, interface, ret); goto error; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK for NI %d", interface)); } ret = mca_btl_portals4_recv_enable(portals4_btl); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: Initialization of recv buffer failed: %d", __FILE__, __LINE__, ret); goto error; } return OPAL_SUCCESS; error: opal_output_verbose(1, opal_btl_base_framework.framework_output, "Error in btl_portals4_init_interface"); for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) { portals4_btl = mca_btl_portals4_component.btls[interface]; if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl); } mca_btl_portals4_component.num_btls = 0; if (NULL != mca_btl_portals4_component.btls) free(mca_btl_portals4_component.btls); if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h); mca_btl_portals4_component.btls = NULL; mca_btl_portals4_component.eqs_h = NULL; return OPAL_ERROR; }
mca_btl_base_registration_handle_t * mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base, mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; mca_btl_base_registration_handle_t *handle = NULL; ptl_me_t me; int ret; handle = (mca_btl_base_registration_handle_t *)malloc(sizeof(mca_btl_base_registration_handle_t)); if (!handle) { return NULL; } handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1); handle->remote_offset = 0; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld flags=%d", portals4_btl->interface_num, base, size, (void *)handle, handle->key, flags)); /* create a match entry */ me.start = base; me.length = size; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_GET | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; if (mca_btl_portals4_component.use_logical) { me.match_id.rank = endpoint->ptl_proc.rank; } else { me.match_id.phys.nid = endpoint->ptl_proc.phys.nid; me.match_id.phys.pid = endpoint->ptl_proc.phys.pid; } me.match_bits = handle->key; me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK | BTL_PORTALS4_CONTEXT_MASK | BTL_PORTALS4_SOURCE_MASK; me.ignore_bits = 0; ret = PtlMEAppend(portals4_btl->portals_ni_h, portals4_btl->recv_idx, &me, PTL_PRIORITY_LIST, handle, &(handle->me_h)); if (PTL_OK != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1); return NULL; } OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n", (void *)handle, handle->me_h, me.start, me.length, me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits)); return handle; }
int ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, struct opal_convertor_t *convertor, mca_mtl_request_t *mtl_request) { ptl_match_bits_t match_bits, ignore_bits; int ret = OMPI_SUCCESS; ptl_process_t remote_proc; mca_mtl_base_endpoint_t *endpoint = NULL; ompi_mtl_portals4_recv_request_t *ptl_request = (ompi_mtl_portals4_recv_request_t*) mtl_request; void *start; size_t length; bool free_after; ptl_me_t me; if (MPI_ANY_SOURCE == src) { remote_proc.phys.nid = PTL_NID_ANY; remote_proc.phys.pid = PTL_PID_ANY; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml; remote_proc = endpoint->ptl_proc; } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, src, tag); ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); return ret; } #if OPAL_ENABLE_DEBUG ptl_request->opcount = ++ompi_mtl_portals4.recv_opcount; ptl_request->hdr_data = 0; #endif ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->convertor = convertor; ptl_request->delivery_ptr = start; ptl_request->delivery_len = length; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d from %x,%x of length %d (0x%lx, 0x%lx)\n", ptl_request->opcount, remote_proc.phys.nid, remote_proc.phys.pid, (int)length, match_bits, ignore_bits)); me.start = start; me.length = length; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id = remote_proc; me.match_bits = match_bits; me.ignore_bits = ignore_bits; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.send_idx, &me, PTL_PRIORITY_LIST, ptl_request, &ptl_request->me_h); if (PTL_OK != ret) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); opal_output_verbose(1, ompi_mtl_base_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); return ompi_mtl_portals4_get_error(ret); } return OMPI_SUCCESS; }
int ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, struct opal_convertor_t *convertor, mca_mtl_request_t *mtl_request) { ptl_match_bits_t match_bits, ignore_bits; int ret = OMPI_SUCCESS; ptl_process_t remote_proc; ompi_mtl_portals4_recv_request_t *ptl_request = (ompi_mtl_portals4_recv_request_t*) mtl_request; void *start; size_t length; bool free_after; ptl_me_t me; if (MPI_ANY_SOURCE == src) { if (ompi_mtl_portals4.use_logical) { remote_proc.rank = PTL_RANK_ANY; } else { remote_proc.phys.nid = PTL_NID_ANY; remote_proc.phys.pid = PTL_PID_ANY; } } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, src, tag); ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } ptl_request->super.type = portals4_req_recv; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; #if OPAL_ENABLE_DEBUG ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1); ptl_request->hdr_data = 0; #endif ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->convertor = convertor; ptl_request->delivery_ptr = start; ptl_request->delivery_len = length; ptl_request->req_started = false; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; ptl_request->pending_reply = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n", ptl_request->opcount, remote_proc.phys.nid, remote_proc.phys.pid, (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request)); me.start = start; me.length = length; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE; if (length <= ompi_mtl_portals4.short_limit) { me.options |= PTL_ME_EVENT_LINK_DISABLE; } me.match_id = remote_proc; me.match_bits = match_bits; me.ignore_bits = ignore_bits; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, PTL_PRIORITY_LIST, ptl_request, &ptl_request->me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); return ompi_mtl_portals4_get_error(ret); } /* if a long message, spin until we either have a comm event or a link event, guaranteeing progress for long unexpected messages. */ if (length > ompi_mtl_portals4.short_limit) { while (true != ptl_request->req_started) { ompi_mtl_portals4_progress(); } } return OMPI_SUCCESS; }
static int setup_sync_handles(struct ompi_communicator_t *comm, ompi_coll_portals4_request_t *request, mca_coll_portals4_module_t *portals4_module) { int ret, line; ptl_me_t me; OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank)); /**********************************/ /* Setup Sync Handles */ /**********************************/ COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm), 0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)", request->u.scatter.my_rank, request->u.scatter.sync_match_bits)); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.scatter.sync_cth); if (PTL_OK != ret) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } request->u.scatter.sync_mdh = mca_coll_portals4_component.zero_md_h; me.start = NULL; me.length = 0; me.ct_handle = request->u.scatter.sync_cth; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = request->u.scatter.sync_match_bits; me.ignore_bits = 0; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx, &me, PTL_PRIORITY_LIST, NULL, &request->u.scatter.sync_meh); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank)); return OMPI_SUCCESS; err_hdlr: opal_output(ompi_coll_base_framework.framework_output, "%s:%4d:%4d\tError occurred ret=%d, rank %2d", __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); return ret; }
int ompi_mtl_portals4_flowctl_init(void) { ptl_me_t me; int ret; ompi_mtl_portals4.flowctl.flowctl_active = false; OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_t); OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.pending_fl, opal_free_list_t); opal_free_list_init(&ompi_mtl_portals4.flowctl.pending_fl, sizeof(ompi_mtl_portals4_pending_request_t), opal_cache_line_size, OBJ_CLASS(ompi_mtl_portals4_pending_request_t), 0, 0, 1, -1, 1, NULL, 0, NULL, NULL, NULL); ompi_mtl_portals4.flowctl.max_send_slots = (ompi_mtl_portals4.send_queue_size - 3) / 3; ompi_mtl_portals4.flowctl.send_slots = ompi_mtl_portals4.flowctl.max_send_slots; ompi_mtl_portals4.flowctl.alert_req.type = portals4_req_flowctl; ompi_mtl_portals4.flowctl.alert_req.event_callback = flowctl_alert_callback; ompi_mtl_portals4.flowctl.fanout_req.type = portals4_req_flowctl; ompi_mtl_portals4.flowctl.fanout_req.event_callback = flowctl_fanout_callback; ompi_mtl_portals4.flowctl.epoch_counter = -1; ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_ONLY_TRUNCATE, ompi_mtl_portals4.send_eq_h, REQ_FLOWCTL_TABLE_ID, &ompi_mtl_portals4.flowctl_idx); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.flowctl.trigger_ct_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* everyone creates the trigger ME, even if the root may be the only to use it */ me.start = NULL; me.length = 0; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; if (ompi_mtl_portals4.use_logical) { me.match_id.rank = PTL_RANK_ANY; } else { me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; } me.ignore_bits = 0; me.options = PTL_ME_OP_PUT | PTL_ME_ACK_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_CT_COMM; me.ct_handle = ompi_mtl_portals4.flowctl.trigger_ct_h; me.match_bits = MTL_PORTALS4_FLOWCTL_TRIGGER; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx, &me, PTL_PRIORITY_LIST, NULL, &ompi_mtl_portals4.flowctl.trigger_me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Alert CT/ME for broadcasting out alert when root receives a trigger */ ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.flowctl.alert_ct_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } me.options = PTL_ME_OP_PUT | PTL_ME_ACK_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM; me.ct_handle = ompi_mtl_portals4.flowctl.alert_ct_h; me.match_bits = MTL_PORTALS4_FLOWCTL_ALERT; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx, &me, PTL_PRIORITY_LIST, &ompi_mtl_portals4.flowctl.alert_req, &ompi_mtl_portals4.flowctl.alert_me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Fanin CT/ME for receiving fan-in for restart */ ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.flowctl.fanin_ct_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } me.options = PTL_ME_OP_PUT | PTL_ME_ACK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM; me.ct_handle = ompi_mtl_portals4.flowctl.fanin_ct_h; me.match_bits = MTL_PORTALS4_FLOWCTL_FANIN; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx, &me, PTL_PRIORITY_LIST, NULL, &ompi_mtl_portals4.flowctl.fanin_me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Fan-out CT/ME for sending restart messages after fan-in */ ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.flowctl.fanout_ct_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } me.options = PTL_ME_OP_PUT | PTL_ME_ACK_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM; me.ct_handle = ompi_mtl_portals4.flowctl.fanout_ct_h; me.match_bits = MTL_PORTALS4_FLOWCTL_FANOUT; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx, &me, PTL_PRIORITY_LIST, &ompi_mtl_portals4.flowctl.fanout_req, &ompi_mtl_portals4.flowctl.fanout_me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } ompi_mtl_portals4.flowctl.num_children = 0; gettimeofday(&ompi_mtl_portals4.flowctl.tv, NULL); ompi_mtl_portals4.flowctl.backoff_count = 0; ret = OMPI_SUCCESS; error: return ret; }
static inline int ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, int localrank, ptl_process_t ptl_proc, ompi_mtl_portals4_isend_request_t *ptl_request) { int ret; ptl_match_bits_t match_bits; ptl_me_t me; ptl_hdr_data_t hdr_data; ptl_size_t put_length; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_LONG_MSG); MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0); me.start = start; me.length = length; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id = ptl_proc; me.match_bits = hdr_data; me.ignore_bits = 0; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx, &me, PTL_PRIORITY_LIST, ptl_request, &ptl_request->me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); return ompi_mtl_portals4_get_error(ret); } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu long send with hdr_data 0x%lx (0x%lx)", ptl_request->opcount, hdr_data, match_bits)); put_length = (rndv == ompi_mtl_portals4.protocol) ? (ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length; ret = PtlPut(ompi_mtl_portals4.send_md_h, (ptl_size_t) start, put_length, PTL_ACK_REQ, ptl_proc, ompi_mtl_portals4.recv_idx, match_bits, 0, ptl_request, hdr_data); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d", __FILE__, __LINE__, ret); PtlMEUnlink(ptl_request->me_h); return ompi_mtl_portals4_get_error(ret); } return OMPI_SUCCESS; }
static inline int ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int length, int contextid, int tag, int localrank, ptl_process_t ptl_proc, ompi_mtl_portals4_isend_request_t *ptl_request) { int ret; ptl_match_bits_t match_bits; ptl_me_t me; ptl_hdr_data_t hdr_data; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_SHORT_MSG); MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0); if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id = ptl_proc; me.match_bits = hdr_data; me.ignore_bits = 0; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx, &me, PTL_PRIORITY_LIST, ptl_request, &ptl_request->me_h); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d", __FILE__, __LINE__, ret); return ompi_mtl_portals4_get_error(ret); } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu short sync send with hdr_data 0x%lx (0x%lx)", ptl_request->opcount, hdr_data, match_bits)); } else { ptl_request->event_count = 1; ptl_request->me_h = PTL_INVALID_HANDLE; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu short send with hdr_data 0x%lx (0x%lx)", ptl_request->opcount, hdr_data, match_bits)); } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu, start: %p", ptl_request->opcount, start)); ret = PtlPut(ompi_mtl_portals4.send_md_h, (ptl_size_t) start, length, PTL_ACK_REQ, ptl_proc, ompi_mtl_portals4.recv_idx, match_bits, 0, ptl_request, hdr_data); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d", __FILE__, __LINE__, ret); if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { PtlMEUnlink(ptl_request->me_h); } return ompi_mtl_portals4_get_error(ret); } return OMPI_SUCCESS; }
static int portals4_init_interface(void) { unsigned int ret; ptl_md_t md; ptl_me_t me; /* create event queues */ ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.send_queue_size, &ompi_mtl_portals4.send_eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_queue_size, &ompi_mtl_portals4.recv_eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Create send and long message (read) portal table entries */ ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, ompi_mtl_portals4.recv_eq_h, REQ_RECV_TABLE_ID, &ompi_mtl_portals4.recv_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } if (ompi_mtl_portals4.recv_idx != REQ_RECV_TABLE_ID) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", __FILE__, __LINE__, ompi_mtl_portals4.recv_idx); goto error; } ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE, ompi_mtl_portals4.send_eq_h, REQ_READ_TABLE_ID, &ompi_mtl_portals4.read_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } if (ompi_mtl_portals4.read_idx != REQ_READ_TABLE_ID) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n", __FILE__, __LINE__, ompi_mtl_portals4.read_idx); goto error; } /* bind zero-length md for sending acks */ md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &ompi_mtl_portals4.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Bind MD across all memory */ md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = ompi_mtl_portals4.send_eq_h; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(ompi_mtl_portals4.ni_h, &md, &ompi_mtl_portals4.send_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* Handle long overflows */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_COMM_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; if (ompi_mtl_portals4.use_logical) { me.match_id.rank = PTL_RANK_ANY; } else { me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; } me.match_bits = MTL_PORTALS4_LONG_MSG; me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK; ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx, &me, PTL_OVERFLOW_LIST, NULL, &ompi_mtl_portals4.long_overflow_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* attach short unex recv blocks */ ret = ompi_mtl_portals4_recv_short_init(); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: short receive block initialization failed: %d\n", __FILE__, __LINE__, ret); goto error; } ompi_mtl_portals4.opcount = 0; #if OPAL_ENABLE_DEBUG ompi_mtl_portals4.recv_opcount = 0; #endif #if OMPI_MTL_PORTALS4_FLOW_CONTROL ret = ompi_mtl_portals4_flowctl_init(); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n", __FILE__, __LINE__, ret); goto error; } #endif return OMPI_SUCCESS; error: if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) { PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); } if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(ompi_mtl_portals4.zero_md_h); } if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) { PtlMDRelease(ompi_mtl_portals4.send_md_h); } if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) { PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx); } if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) { PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); } if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) { PtlEQFree(ompi_mtl_portals4.send_eq_h); } if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) { PtlEQFree(ompi_mtl_portals4.recv_eq_h); } return OMPI_ERROR; }
/* /!\ Called for each processes /!\ */ static int portals4_init_query(bool enable_progress_threads, bool enable_mpi_threads) { int ret; ptl_md_t md; ptl_me_t me; /* Initialize Portals and create a physical, matching interface */ ret = PtlInit(); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlInit failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, &mca_coll_portals4_component.ni_limits, &mca_coll_portals4_component.ni_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlNIInit failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlGetid failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* FIX ME: Need to make sure our ID matches with the MTL... */ ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlGetUid failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlEQAlloc(mca_coll_portals4_component.ni_h, MCA_COLL_PORTALS4_EQ_SIZE, &mca_coll_portals4_component.eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlPTAlloc(mca_coll_portals4_component.ni_h, 0, mca_coll_portals4_component.eq_h, REQ_COLL_TABLE_ID, &mca_coll_portals4_component.pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n", __FILE__, __LINE__, mca_coll_portals4_component.finish_pt_idx); return OMPI_ERROR; } ret = PtlPTAlloc(mca_coll_portals4_component.ni_h, 0, mca_coll_portals4_component.eq_h, REQ_COLL_FINISH_TABLE_ID, &mca_coll_portals4_component.finish_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n", __FILE__, __LINE__, mca_coll_portals4_component.finish_pt_idx); return OMPI_ERROR; } /* Bind MD/MDs across all memory. We prefer (for obvious reasons) to have a single MD across all of memory */ memset(&md, 0, sizeof(ptl_md_t)); md.start = 0; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(mca_coll_portals4_component.ni_h, &md, &mca_coll_portals4_component.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(mca_coll_portals4_component.ni_h, &md, &mca_coll_portals4_component.data_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length)); /* setup finish ack ME */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = 0; me.ignore_bits = 0; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx, &me, PTL_PRIORITY_LIST, NULL, &mca_coll_portals4_component.finish_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* This ME is used for RTR exchange only */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; /* Note : the RTR bit must be set to match this ME, * this allows to discriminate the RTR from data flow * (especially for the Barrier operations) */ COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0); me.ignore_bits = ~COLL_PORTALS4_RTR_MASK; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx, &me, PTL_OVERFLOW_LIST, NULL, &mca_coll_portals4_component.unex_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* activate progress callback */ ret = opal_progress_register(portals4_progress); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: opal_progress_register failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } return OMPI_SUCCESS; }
int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; int ret, i, dim, hibit, mask, num_msgs; int size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); ptl_ct_event_t ct; ptl_handle_ct_t ct_h; ptl_handle_me_t me_h; ptl_me_t me; size_t count; ptl_match_bits_t match_bits; ptl_handle_md_t md_h; void *base; ompi_coll_portals4_get_md(0, &md_h, &base); count = opal_atomic_add_size_t(&portals4_module->barrier_count, 1); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &ct_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, COLL_PORTALS4_BARRIER, count); /* Build "tree" out of hypercube */ dim = comm->c_cube_dim; hibit = opal_hibit(rank, dim); --dim; /* receive space */ me.start = NULL; me.length = 0; me.ct_handle = ct_h; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = match_bits; me.ignore_bits = 0; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx, &me, PTL_PRIORITY_LIST, NULL, &me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* calculate number of children to receive from */ num_msgs = ompi_coll_portals4_get_nchildren(dim + 1, hibit, rank, size); /* send to parent when children have sent to us */ if (rank > 0) { int parent = rank & ~(1 << hibit); ret = PtlTriggeredPut(md_h, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, parent), mca_coll_portals4_component.pt_idx, match_bits, 0, NULL, 0, ct_h, num_msgs); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlTriggeredPut failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* we'll need to wait for the parent response before the next set of comms */ num_msgs++; } /* send to children when parent (or all children if root) has sent to us */ for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) { int peer = rank | mask; if (peer < size) { ret = PtlTriggeredPut(md_h, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, peer), mca_coll_portals4_component.pt_idx, match_bits, 0, NULL, 0, ct_h, num_msgs); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlTriggeredPut failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } } } /* Wait for all incoming messages */ ret = PtlCTWait(ct_h, num_msgs, &ct); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTWait failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* cleanup */ ret = PtlMEUnlink(me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlCTFree(ct_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTFree failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct ompi_info_t *info, int flavor, int *model) { ompi_osc_portals4_module_t *module = NULL; int ret = OMPI_ERROR; int tmp; ptl_md_t md; ptl_me_t me; char *name; if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED; /* create module structure */ module = (ompi_osc_portals4_module_t*) calloc(1, sizeof(ompi_osc_portals4_module_t)); if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; /* fill in the function pointer part */ memcpy(module, &ompi_osc_portals4_module_template, sizeof(ompi_osc_base_module_t)); /* fill in our part */ if (MPI_WIN_FLAVOR_ALLOCATE == flavor) { module->free_after = *base = malloc(size); if (NULL == *base) goto error; } else { module->free_after = NULL; } ret = ompi_comm_dup(comm, &module->comm); if (OMPI_SUCCESS != ret) goto error; opal_output_verbose(1, ompi_osc_base_framework.framework_output, "portals4 component creating window with id %d", ompi_comm_get_cid(module->comm)); asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm)); ompi_win_set_name(win, name); free(name); /* share everyone's displacement units. Only do an allgather if strictly necessary, since it requires O(p) state. */ tmp = disp_unit; ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0, module->comm, module->comm->c_coll.coll_bcast_module); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: MPI_Bcast failed: %d\n", __FILE__, __LINE__, ret); goto error; } tmp = (tmp == disp_unit) ? 1 : 0; ret = module->comm->c_coll.coll_allreduce(MPI_IN_PLACE, &tmp, 1, MPI_INT, MPI_LAND, module->comm, module->comm->c_coll.coll_allreduce_module); if (OMPI_SUCCESS != ret) goto error; if (tmp == 1) { module->disp_unit = disp_unit; module->disp_units = NULL; } else { module->disp_unit = -1; module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm)); ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT, module->disp_units, 1, MPI_INT, module->comm, module->comm->c_coll.coll_allgather_module); if (OMPI_SUCCESS != ret) goto error; } module->ni_h = mca_osc_portals4_component.matching_ni_h; module->pt_idx = mca_osc_portals4_component.matching_pt_idx; ret = PtlCTAlloc(module->ni_h, &(module->ct_h)); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); goto error; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; ret = PtlMDBind(module->ni_h, &md, &module->md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK; md.eq_handle = mca_osc_portals4_component.matching_eq_h; md.ct_handle = module->ct_h; ret = PtlMDBind(module->ni_h, &md, &module->req_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); goto error; } if (MPI_WIN_FLAVOR_DYNAMIC == flavor) { me.start = 0; me.length = PTL_SIZE_MAX; } else { me.start = *base; me.length = size; } me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = module->comm->c_contextid; me.ignore_bits = 0; ret = PtlMEAppend(module->ni_h, module->pt_idx, &me, PTL_PRIORITY_LIST, NULL, &module->data_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } me.start = &module->state; me.length = sizeof(module->state); me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = module->comm->c_contextid | OSC_PORTALS4_MB_CONTROL; me.ignore_bits = 0; ret = PtlMEAppend(module->ni_h, module->pt_idx, &me, PTL_PRIORITY_LIST, NULL, &module->control_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); goto error; } module->opcount = 0; module->match_bits = module->comm->c_contextid; module->atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ? mca_osc_portals4_component.matching_atomic_max : MIN(mca_osc_portals4_component.matching_atomic_max, mca_osc_portals4_component.matching_atomic_ordered_size); module->fetch_atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ? mca_osc_portals4_component.matching_fetch_atomic_max : MIN(mca_osc_portals4_component.matching_fetch_atomic_max, mca_osc_portals4_component.matching_atomic_ordered_size); module->zero = 0; module->one = 1; module->start_group = NULL; module->post_group = NULL; module->state.post_count = 0; module->state.complete_count = 0; if (check_config_value_bool("no_locks", info)) { module->state.lock = LOCK_ILLEGAL; } else { module->state.lock = LOCK_UNLOCKED; } OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t); module->passive_target_access_epoch = false; #if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 *model = MPI_WIN_UNIFIED; #else *model = MPI_WIN_SEPARATE; #endif win->w_osc_module = &module->super; PtlAtomicSync(); /* Make sure that everyone's ready to receive. */ module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); return OMPI_SUCCESS; error: /* BWB: FIX ME: This is all wrong... */ if (0 != module->ct_h) PtlCTFree(module->ct_h); if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h); if (0 != module->req_md_h) PtlMDRelease(module->req_md_h); if (0 != module->md_h) PtlMDRelease(module->md_h); if (NULL != module->comm) ompi_comm_free(&module->comm); if (NULL != module) free(module); return ret; }
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; MPID_Request *sreq = NULL; ptl_me_t me; int initial_iov_count, remaining_iov_count; ptl_md_t md; MPI_Aint last; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_SEND_MSG); MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG); MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm); sreq->dev.match.parts.rank = dest; sreq->dev.match.parts.tag = tag; sreq->dev.match.parts.context_id = comm->context_id + context_offset; sreq->ch.vc = vc; if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz)); if (data_sz <= PTL_LARGE_THRESHOLD) { /* Small message. Send all data eagerly */ if (dt_contig) { void *start = (char *)buf + dt_true_lb; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message"); REQ_PTL(sreq)->event_handler = handler_send; MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler)); if (start == NULL) ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); else ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq); MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt); MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler); goto fn_exit; } /* noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = sreq->dev.segment_size; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); if (last == sreq->dev.segment_size) { /* IOV is able to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " entire message fits in IOV"); md.start = sreq->dev.iov; md.length = sreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* IOV is not long enough to describe entire message */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " IOV too long: using bounce buffer"); MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; last = data_sz; MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]); MPIU_Assert(last == sreq->dev.segment_size); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz)); goto fn_exit; } /* Large message. Send first chunk of data and let receiver get the rest */ if (dt_contig) { /* create ME for buffer so receiver can issue a GET for the data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message"); big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; } /* Large noncontig data */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message"); sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = PTL_LARGE_THRESHOLD; sreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count); initial_iov_count = sreq->dev.iov_count; sreq->dev.segment_first = last; if (last == PTL_LARGE_THRESHOLD) { /* first chunk of message fits into IOV */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " first chunk fits in IOV"); if (initial_iov_count < MPL_IOV_LIMIT) { /* There may be space for the rest of the message in this IOV */ sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count; last = sreq->dev.segment_size; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count); remaining_iov_count = sreq->dev.iov_count; if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Entire message fit in one IOV */ int was_incomplete; MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, " rest of message fits in one IOV"); /* Create ME for remaining data */ me.start = &sreq->dev.iov[initial_iov_count]; me.length = remaining_iov_count; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC ); me.match_id = vc_ptl->id; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank); me.ignore_bits = 0; me.min_free = 0; MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p"); ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq, &REQ_PTL(sreq)->get_me_p[0]); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq); /* increment the cc for the get operation */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); MPIU_Assert(was_incomplete); /* Create MD for first chunk */ md.start = sreq->dev.iov; md.length = initial_iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(sreq)->event_handler = handler_send; ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq, NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz)); goto fn_exit; }