/* initialize short copy blocks */ 
void
ompi_mtl_portals_short_setup()
{
    int ret;
    int i;

    if ((ompi_mtl_portals.ptl_num_copy_blocks > 0) && (ompi_mtl_portals.ptl_copy_block_len > 0)) {

        ompi_mtl_portals.ptl_short_md.length = ompi_mtl_portals.ptl_num_copy_blocks * 
                                               ompi_mtl_portals.ptl_copy_block_len;

        ompi_mtl_portals.ptl_short_md.start = malloc(ompi_mtl_portals.ptl_short_md.length);
        if (NULL == ompi_mtl_portals.ptl_short_md.start ) {
            ompi_mtl_portals.ptl_num_copy_blocks = 0;
            return;
        }

        ompi_mtl_portals.ptl_short_md.threshold = PTL_MD_THRESH_INF;
        ompi_mtl_portals.ptl_short_md.max_size  = 0;
        ompi_mtl_portals.ptl_short_md.options   = PTL_MD_EVENT_START_DISABLE;
        ompi_mtl_portals.ptl_short_md.user_ptr  = &ptl_short_request;
        ompi_mtl_portals.ptl_short_md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h,
                        ompi_mtl_portals.ptl_short_md,
                        PTL_RETAIN,
                        &ompi_mtl_portals.ptl_short_md_h);
        if (PTL_OK != ret) {
            free(ompi_mtl_portals.ptl_short_md.start);
            ompi_mtl_portals.ptl_num_copy_blocks = 0;
            return;
        }

        ptl_short_request.event_callback = ompi_mtl_portals_short_callback;

        ompi_mtl_portals.ptl_copy_block_free_list = malloc(ompi_mtl_portals.ptl_num_copy_blocks * sizeof(int));
        if (NULL == ompi_mtl_portals.ptl_copy_block_free_list) {
            free(ompi_mtl_portals.ptl_short_md.start);
            ompi_mtl_portals.ptl_num_copy_blocks = 0;
            return;
        }

        for (i=0; i<ompi_mtl_portals.ptl_num_copy_blocks; i++) {
            ompi_mtl_portals.ptl_copy_block_free_list[i] = i;
        }

        ompi_mtl_portals.ptl_copy_block_first_free = 0;

    }

}
Esempio n. 2
0
static int
read_msg(void *start, ptl_size_t length, ptl_process_t target,
         ptl_match_bits_t match_bits, ptl_size_t remote_offset,
         ompi_mtl_portals4_recv_request_t *request)
{
    ptl_md_t md;
    int ret;

    /* FIX ME: This needs to be on the send eq... */
    md.start = start;
    md.length = length;
    md.options = 0;
    md.eq_handle = ompi_mtl_portals4.send_eq_h;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                    &md,
                    &request->md_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d",
                            __FILE__, __LINE__, ret);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        ompi_mtl_portals4_progress();
    }
#endif

    ret = PtlGet(request->md_h,
                 0,
                 md.length,
                 target,
                 ompi_mtl_portals4.read_idx,
                 match_bits,
                 remote_offset,
                 request);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlGet failed: %d",
                            __FILE__, __LINE__, ret);
        PtlMDRelease(request->md_h);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    return OMPI_SUCCESS;
}
Esempio n. 3
0
int MPID_nem_ptl_rptl_ptinit(ptl_handle_ni_t ni_handle, ptl_handle_eq_t eq_handle, ptl_pt_index_t data_pt,
                             ptl_pt_index_t control_pt)
{
    int ret = PTL_OK;
    struct rptl *rptl;
    int mpi_errno = MPI_SUCCESS;
    int i;
    ptl_md_t md;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);


    /* setup the parts of rptls that can be done before world size or
     * target information */
    MPIU_CHKPMEM_MALLOC(rptl, struct rptl *, sizeof(struct rptl), mpi_errno, "rptl");
    MPL_DL_APPEND(rptl_info.rptl_list, rptl);

    rptl->local_state = RPTL_LOCAL_STATE_ACTIVE;
    rptl->pause_ack_counter = 0;

    rptl->data.ob_max_count = 0;
    rptl->data.ob_curr_count = 0;

    rptl->data.pt = data_pt;
    rptl->control.pt = control_pt;

    rptl->ni = ni_handle;
    rptl->eq = eq_handle;

    md.start = 0;
    md.length = (ptl_size_t) (-1);
    md.options = 0x0;
    md.eq_handle = rptl->eq;
    md.ct_handle = PTL_CT_NONE;
    ret = PtlMDBind(rptl->ni, &md, &rptl->md);
    RPTLU_ERR_POP(ret, "Error binding new global MD\n");

    /* post world_size number of empty buffers on the control portal */
    if (rptl->control.pt != PTL_PT_ANY) {
        MPIU_CHKPMEM_MALLOC(rptl->control.me, ptl_handle_me_t *,
                            2 * rptl_info.world_size * sizeof(ptl_handle_me_t), mpi_errno,
                            "rptl target info");
        for (i = 0; i < 2 * rptl_info.world_size; i++) {
            ret = rptli_post_control_buffer(rptl->ni, rptl->control.pt, &rptl->control.me[i]);
            RPTLU_ERR_POP(ret, "Error in rptli_post_control_buffer\n");
        }
        rptl->control.me_idx = 0;
    }
Esempio n. 4
0
static int handler_recv_dequeue_large(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const rreq = e->user_ptr;
    MPIDI_VC_t *vc;
    MPID_nem_ptl_vc_area *vc_ptl;
    int ret;
    int dt_contig;
    MPIDI_msg_sz_t data_sz;
    MPID_Datatype *dt_ptr;
    MPI_Aint dt_true_lb;
    MPI_Aint last;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
    
    MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);

    MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc);
    vc_ptl = VC_PTL(vc);
    
    dequeue_req(e);

    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    /* unpack data from unexpected buffer first */
    if (e->type == PTL_EVENT_PUT_OVERFLOW) {
        if (dt_contig) {
            MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength);
        } else {
            last = e->mlength;
            MPID_Segment_unpack(rreq->dev.segment_ptr, 0, &last, e->start);
            MPIU_Assert(last == e->mlength);
            rreq->dev.segment_first = e->mlength;
        }
    }
    
    if (!(e->hdr_data & NPTL_LARGE)) {
        /* all data has already been received; we're done */
        mpi_errno = handler_recv_complete(e);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        goto fn_exit;
    }
        
    MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD);

    /* we need to GET the rest of the data from the sender's buffer */
    if (dt_contig) {
        big_get((char *)rreq->dev.user_buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD,
                vc, e->match_bits, rreq);
        goto fn_exit;
    }

    /* noncontig recv buffer */
    
    last = rreq->dev.segment_size;
    rreq->dev.iov_count = MPL_IOV_LIMIT;
    MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count);

    if (last == rreq->dev.segment_size && rreq->dev.segment_size <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) {
        /* Rest of message fits in one IOV */
        ptl_md_t md;

        md.start = rreq->dev.iov;
        md.length = rreq->dev.iov_count;
        md.options = PTL_IOVEC;
        md.eq_handle = MPIDI_nem_ptl_origin_eq;
        md.ct_handle = PTL_CT_NONE;
        ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(rreq)->md);
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

        REQ_PTL(rreq)->event_handler = handler_recv_complete;
        ret = MPID_nem_ptl_rptl_get(REQ_PTL(rreq)->md, 0, rreq->dev.segment_size - rreq->dev.segment_first, vc_ptl->id, vc_ptl->ptg,
                     e->match_bits, 0, rreq);
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s", MPID_nem_ptl_strerror(ret));
        goto fn_exit;
    }
        
    /* message won't fit in a single IOV, allocate buffer and unpack when received */
    /* FIXME: For now, allocate a single large buffer to hold entire message */
    MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD,
                        mpi_errno, "chunk_buffer");
    big_get(REQ_PTL(rreq)->chunk_buffer[0], data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq);

 fn_exit:
    MPIU_CHKPMEM_COMMIT();
 fn_exit2:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
    return mpi_errno;
 fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit2;
}
Esempio n. 5
0
static int
btl_portals4_init_interface(void)
{
    mca_btl_portals4_module_t *portals4_btl;
    unsigned int ret, interface;
    ptl_md_t md;
    ptl_me_t me;

// The initialisation of EQ, PT and ME must be done after the SetMap !
    for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
        portals4_btl = mca_btl_portals4_component.btls[interface];

        /* create event queue */
        ret = PtlEQAlloc(portals4_btl->portals_ni_h,
                     mca_btl_portals4_component.recv_queue_size,
                     &portals4_btl->recv_eq_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        mca_btl_portals4_component.eqs_h[interface] = portals4_btl->recv_eq_h;
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlEQAlloc (recv_eq=%d) OK for NI %d\n", portals4_btl->recv_eq_h, interface));

        /* Create recv_idx portal table entry */
        ret = PtlPTAlloc(portals4_btl->portals_ni_h,
                     PTL_PT_ONLY_TRUNCATE,
                     portals4_btl->recv_eq_h,
                     REQ_BTL_TABLE_ID,
                     &portals4_btl->recv_idx);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlPTAlloc (recv_idx) OK for NI %d recv_idx=%d", interface, portals4_btl->recv_idx));

        if (portals4_btl->recv_idx != REQ_BTL_TABLE_ID) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc did not allocate the requested PT: %d",
                           __FILE__, __LINE__, portals4_btl->recv_idx);
            goto error;
        }

        /* bind zero-length md for sending acks */
        md.start     = NULL;
        md.length    = 0;
        md.options   = 0;
        md.eq_handle = PTL_EQ_NONE;
        md.ct_handle = PTL_CT_NONE;

        ret = PtlMDBind(portals4_btl->portals_ni_h,
                    &md,
                    &portals4_btl->zero_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface));

        /* Bind MD across all memory */
        md.start = 0;
        md.length = PTL_SIZE_MAX;
        md.options = 0;
        md.eq_handle = portals4_btl->recv_eq_h;
        md.ct_handle = PTL_CT_NONE;

        ret = PtlMDBind(portals4_btl->portals_ni_h,
                    &md,
                    &portals4_btl->send_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed for NI %d: %d\n",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }

        /* Handle long overflows */
        me.start = NULL;
        me.length = 0;
        me.ct_handle = PTL_CT_NONE;
        me.min_free = 0;
        me.uid = PTL_UID_ANY;
        me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_LINK_DISABLE |
            PTL_ME_EVENT_COMM_DISABLE |
            PTL_ME_EVENT_UNLINK_DISABLE;
        if (mca_btl_portals4_component.use_logical) {
            me.match_id.rank = PTL_RANK_ANY;
        } else {
            me.match_id.phys.nid = PTL_NID_ANY;
            me.match_id.phys.pid = PTL_PID_ANY;
        }
        me.match_bits = BTL_PORTALS4_LONG_MSG;
        me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK |
            BTL_PORTALS4_SOURCE_MASK |
            BTL_PORTALS4_TAG_MASK;
        ret = PtlMEAppend(portals4_btl->portals_ni_h,
                      portals4_btl->recv_idx,
                      &me,
                      PTL_OVERFLOW_LIST,
                      NULL,
                      &portals4_btl->long_overflow_me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK for NI %d", interface));
    }

    ret = mca_btl_portals4_recv_enable(portals4_btl);
    if (PTL_OK != ret) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: Initialization of recv buffer failed: %d",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    return OPAL_SUCCESS;

 error:
    opal_output_verbose(1, opal_btl_base_framework.framework_output, "Error in btl_portals4_init_interface");

    for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
        portals4_btl = mca_btl_portals4_component.btls[interface];
        if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl);
    }
    mca_btl_portals4_component.num_btls = 0;
    if (NULL != mca_btl_portals4_component.btls)  free(mca_btl_portals4_component.btls);
    if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
    mca_btl_portals4_component.btls = NULL;
    mca_btl_portals4_component.eqs_h = NULL;

    return OPAL_ERROR;
}
Esempio n. 6
0
static int
ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, 
                          struct ompi_convertor_t *convertor,
                          ompi_mtl_portals_request_t  *ptl_request)
{
    int ret;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    size_t buflen;
    
    if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) {
        /* the buffer is sitting in the short message queue */

        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;

        ompi_mtl_portals_recv_short_block_t *block = 
            recv_event->ev.md.user_ptr;

        iov.iov_base = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
        iov.iov_len = recv_event->ev.mlength;
        max_data = iov.iov_len;

        /* see if this message filled the receive block */
        if (recv_event->ev.md.length - (recv_event->ev.offset + 
                                        recv_event->ev.mlength) <
            recv_event->ev.md.max_size) {
            block->full = true;
        }

        /* pull out the data */
        if (iov.iov_len > 0) {
            ompi_convertor_unpack(convertor, &iov, &iov_count,
                                  &max_data );
        }

        /* if synchronous, return an ack */
        if (PTL_IS_SYNC_MSG(recv_event->ev)) {
            md.length = 0;
            md.start = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
            md.threshold = 1; /* send */
            md.options = PTL_MD_EVENT_START_DISABLE;
            md.user_ptr = NULL;
            md.eq_handle = ompi_mtl_portals.ptl_eq_h;

            ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                            PTL_UNLINK, &md_h);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlMDBind.  Error code - %d \n",ret);
                abort();
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                                 "acking recv: 0x%016llx\n", 
                                 recv_event->ev.match_bits));

            ret = PtlPut(md_h,
                         PTL_NO_ACK_REQ,
                         recv_event->ev.initiator,
                         OMPI_MTL_PORTALS_ACK_TABLE_ID,
                         0,
                         recv_event->ev.hdr_data,
                         0,
                         0);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlPut.  Error code - %d \n",ret);
                abort();
            }
        }

        /* finished with our buffer space */
        ompi_mtl_portals_return_block_part(&ompi_mtl_portals, block);

        ompi_convertor_get_packed_size(convertor, &buflen);

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
        ptl_request->super.ompi_req->req_status._count = 
            recv_event->ev.mlength;

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "recv complete: 0x%016llx\n", 
                             recv_event->ev.match_bits));
        
        ptl_request->super.completion_callback(&ptl_request->super);

    } else {
        ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
                                         &ptl_request->free_after);
        if (OMPI_SUCCESS != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }
        md.length = (recv_event->ev.rlength > buflen) ? buflen : recv_event->ev.rlength;
        md.threshold = 2; /* send and get */
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        /* retain because it's unclear how many events we'll get here.
           Some implementations give just the REPLY, others give SEND
           and REPLY */
        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                        PTL_RETAIN, &md_h);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->event_callback = ompi_mtl_portals_recv_progress;

        ret = PtlGet(md_h, 
                     recv_event->ev.initiator, 
                     OMPI_MTL_PORTALS_READ_TABLE_ID,
                     0, 
                     recv_event->ev.hdr_data,
                     0);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from PtlGet.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
    }

    return OMPI_SUCCESS;
}
Esempio n. 7
0
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
                    struct mca_btl_base_endpoint_t* btl_peer,
                    void *local_address,
                    uint64_t remote_address,
                    struct mca_btl_base_registration_handle_t *local_handle,
                    struct mca_btl_base_registration_handle_t *remote_handle,
                    size_t size,
                    int flags,
                    int order,
                    mca_btl_base_rdma_completion_fn_t cbfunc,
                    void *cbcontext,
                    void *cbdata)
{
    mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
    mca_btl_portals4_frag_t   *frag         = NULL;
    ptl_md_t md;
    int ret;

    /* reserve space in the event queue for rdma operations immediately */
    while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
           portals4_btl->portals_max_outstanding_ops) {
        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
        mca_btl_portals4_component_progress();
    }

    OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
    if (NULL == frag){
        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
        "mca_btl_portals4_get: Incrementing portals_outstanding_ops=%d frag=%p",
        portals4_btl->portals_outstanding_ops, (void *)frag));

    frag->rdma_cb.func         = cbfunc;
    frag->rdma_cb.context      = cbcontext;
    frag->rdma_cb.data         = cbdata;
    frag->rdma_cb.local_handle = local_handle;

    frag->endpoint = btl_peer;
    frag->hdr.tag = MCA_BTL_TAG_MAX;

    /* Bind the memory */
    md.start = (void *)local_address;
    md.length = size;
    md.options = 0;
    md.eq_handle = portals4_btl->recv_eq_h;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(portals4_btl->portals_ni_h,
                    &md,
                    &frag->md_h);

    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d",
                            __FILE__, __LINE__, ret);
        return OPAL_ERROR;
    }

    frag->match_bits = remote_handle->key;
    frag->length = md.length;
    frag->peer_proc = btl_peer->ptl_proc;

    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
        md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));

    ret = PtlGet(frag->md_h,
                 0,
                 md.length,
                 btl_peer->ptl_proc,
                 portals4_btl->recv_idx,
                 frag->match_bits, /* match bits */
                 0,
                 frag);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlGet failed: %d",
                            __FILE__, __LINE__, ret);
        PtlMDRelease(frag->md_h);
        frag->md_h = PTL_INVALID_HANDLE;
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
        md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));

    return OPAL_SUCCESS;
}
Esempio n. 8
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_logical;
    ptl_pt_index_t  logical_pt_index;
    ptl_process_t   myself;
    struct timeval  start, stop;
    int             potato = 0;
    ENTRY_T         potato_catcher;
    HANDLE_T        potato_catcher_handle;
    ptl_md_t        potato_launcher;
    ptl_handle_md_t potato_launcher_handle;
    int             num_procs;

    CHECK_RETURNVAL(PtlInit());

    CHECK_RETURNVAL(libtest_init());

    num_procs = libtest_get_size();

    if (NULL != getenv("MAKELEVEL") && num_procs > 2) {
        return 77;
    }

    CHECK_RETURNVAL(PtlNIInit
                    (PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY,
                     NULL, NULL, &ni_logical));

    CHECK_RETURNVAL(PtlSetMap(ni_logical, num_procs,
                              libtest_get_mapping(ni_logical)));

    CHECK_RETURNVAL(PtlGetId(ni_logical, &myself));
    CHECK_RETURNVAL(PtlPTAlloc
                    (ni_logical, 0, PTL_EQ_NONE, PTL_PT_ANY,
                     &logical_pt_index));
    assert(logical_pt_index == 0);
    /* Now do the initial setup on ni_logical */
    potato_catcher.start   = &potato;
    potato_catcher.length  = sizeof(potato);
    potato_catcher.uid     = PTL_UID_ANY;
    potato_catcher.options = OPTIONS;
#if INTERFACE == 1
    potato_catcher.match_id.rank = PTL_RANK_ANY;
    potato_catcher.match_bits    = 1;
    potato_catcher.ignore_bits   = ~potato_catcher.match_bits;
#endif
    CHECK_RETURNVAL(PtlCTAlloc(ni_logical, &potato_catcher.ct_handle));
    CHECK_RETURNVAL(APPEND
                    (ni_logical, logical_pt_index, &potato_catcher,
                     PTL_PRIORITY_LIST, NULL, &potato_catcher_handle));
    /* Now do a barrier (on ni_physical) to make sure that everyone has their
     * logical interface set up */
    libtest_barrier();

    /* now I can communicate between ranks with ni_logical */

    /* set up the potato launcher */
    potato_launcher.start   = &potato;
    potato_launcher.length  = sizeof(potato);
    potato_launcher.options =
        PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_CT_SEND;
    potato_launcher.eq_handle = PTL_EQ_NONE;    // i.e. don't queue send events
    CHECK_RETURNVAL(PtlCTAlloc(ni_logical, &potato_launcher.ct_handle));
    CHECK_RETURNVAL(PtlMDBind
                    (ni_logical, &potato_launcher, &potato_launcher_handle));

    /* rank 0 starts the potato going */
    if (myself.rank == 0) {
        ptl_process_t nextrank;
        nextrank.rank  = myself.rank + 1;
        nextrank.rank *= (nextrank.rank <= num_procs - 1);
        gettimeofday(&start, NULL);
        CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0, potato_launcher.length,
                               (LOOPS == 1) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ,
                               nextrank, logical_pt_index, 1, 0,
                               NULL, 1));
    }

    {   /* the potato-passing loop */
        size_t         waitfor;
        ptl_ct_event_t ctc;
        ptl_process_t  nextrank;
        nextrank.rank  = myself.rank + 1;
        nextrank.rank *= (nextrank.rank <= num_procs - 1);
        for (waitfor = 1; waitfor <= LOOPS; ++waitfor) {
            CHECK_RETURNVAL(PtlCTWait(potato_catcher.ct_handle, waitfor, &ctc));        // wait for potato
            assert(ctc.failure == 0);
            assert(ctc.success == waitfor);
            /* I have the potato! */
            ++potato;
            if (potato < LOOPS * (num_procs)) { // otherwise, the recipient may have exited
                /* Bomb's away! */
                if (myself.rank == 0) {
                    CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0,
                                           potato_launcher.length,
                                           (waitfor == (LOOPS - 1)) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ,
                                           nextrank, logical_pt_index, 3, 0, NULL, 2));
                } else {
                    CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0,
                                           potato_launcher.length,
                                           (waitfor == LOOPS) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ,
                                           nextrank, logical_pt_index, 3, 0, NULL, 2));
                }
            }
        }
        // make sure that last send completed before exiting
        CHECK_RETURNVAL(PtlCTWait(potato_launcher.ct_handle, LOOPS+1, &ctc));
        assert(ctc.failure == 0);
    }
    if (myself.rank == 0) {
        double accumulate = 0.0;
        gettimeofday(&stop, NULL);
        accumulate =
            (stop.tv_sec + stop.tv_usec * 1e-6) - (start.tv_sec +
                    start.tv_usec * 1e-6);
        /* calculate the average time waiting */
        printf("Total time: %g secs\n", accumulate);
        accumulate /= LOOPS;
        printf("Average time around the loop: %g microseconds\n",
               accumulate * 1e6);
        accumulate /= num_procs;
        printf("Average catch-to-toss latency: %g microseconds\n",
               accumulate * 1e6);
    }

    /* cleanup */
    CHECK_RETURNVAL(PtlMDRelease(potato_launcher_handle));
    CHECK_RETURNVAL(PtlCTFree(potato_launcher.ct_handle));
    CHECK_RETURNVAL(UNLINK(potato_catcher_handle));
    CHECK_RETURNVAL(PtlCTFree(potato_catcher.ct_handle));

    /* major cleanup */
    CHECK_RETURNVAL(PtlPTFree(ni_logical, logical_pt_index));
    CHECK_RETURNVAL(PtlNIFini(ni_logical));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 9
0
int
kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
                    unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
                    unsigned int offset, int nob)
{
        kptl_tx_t       *tx;
        ptl_err_t        ptlrc;
        kptl_msg_t      *rxmsg = rx->rx_msg;
        kptl_peer_t     *peer = rx->rx_peer;
        unsigned long    flags;
        ptl_handle_md_t  mdh;

        LASSERT (type == TX_TYPE_PUT_RESPONSE || 
                 type == TX_TYPE_GET_RESPONSE);

        tx = kptllnd_get_idle_tx(type);
        if (tx == NULL) {
                CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n",
                        type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT",
                        libcfs_id2str(peer->peer_id));
                return -ENOMEM;
        }

        kptllnd_set_tx_peer(tx, peer);
        kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob);

        ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, 
                          PTL_UNLINK, &mdh);
        if (ptlrc != PTL_OK) {
                CERROR("PtlMDBind(%s) failed: %s(%d)\n",
                       libcfs_id2str(peer->peer_id),
                       kptllnd_errtype2str(ptlrc), ptlrc);
                tx->tx_status = -EIO;
                kptllnd_tx_decref(tx);
                return -EIO;
        }

        cfs_spin_lock_irqsave(&peer->peer_lock, flags);

        tx->tx_lnet_msg = lntmsg;
        /* lnet_finalize() will be called when tx is torn down, so I must
         * return success from here on... */

        tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * CFS_HZ);
        tx->tx_rdma_mdh = mdh;
        tx->tx_active = 1;
        cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);

        /* peer has now got my ref on 'tx' */

        cfs_spin_unlock_irqrestore(&peer->peer_lock, flags);

        tx->tx_tposted = jiffies;

        if (type == TX_TYPE_GET_RESPONSE)
                ptlrc = PtlPut(mdh,
                               tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
                               rx->rx_initiator,
                               *kptllnd_tunables.kptl_portal,
                               0,                     /* acl cookie */
                               rxmsg->ptlm_u.rdma.kptlrm_matchbits,
                               0,                     /* offset */
                               (lntmsg != NULL) ?     /* header data */
                               PTLLND_RDMA_OK :
                               PTLLND_RDMA_FAIL);
        else
                ptlrc = PtlGet(mdh,
                               rx->rx_initiator,
                               *kptllnd_tunables.kptl_portal,
                               0,                     /* acl cookie */
                               rxmsg->ptlm_u.rdma.kptlrm_matchbits,
                               0);                    /* offset */

        if (ptlrc != PTL_OK) {
                CERROR("Ptl%s failed: %s(%d)\n", 
                       (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get",
                       kptllnd_errtype2str(ptlrc), ptlrc);
                
                kptllnd_peer_close(peer, -EIO);
                /* Everything (including this RDMA) queued on the peer will
                 * be completed with failure */
                kptllnd_schedule_ptltrace_dump();
        }

        return 0;
}
Esempio n. 10
0
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
                    int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype *dt_ptr;
    MPID_Request *sreq = NULL;
    ptl_me_t me;
    int initial_iov_count, remaining_iov_count;
    ptl_md_t md;
    MPI_Aint last;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);

    MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
    sreq->dev.match.parts.rank = dest;
    sreq->dev.match.parts.tag = tag;
    sreq->dev.match.parts.context_id = comm->context_id + context_offset;
    sreq->ch.vc = vc;

    if (!vc_ptl->id_initialized) {
        mpi_errno = MPID_nem_ptl_init_id(vc);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz));

    if (data_sz <= PTL_LARGE_THRESHOLD) {
        /* Small message.  Send all data eagerly */
        if (dt_contig) {
            void *start = (char *)buf + dt_true_lb;
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message");
            REQ_PTL(sreq)->event_handler = handler_send;
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler));
            if (start == NULL)
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            else
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler);
            goto fn_exit;
        }

        /* noncontig data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message");
        sreq->dev.segment_ptr = MPID_Segment_alloc();
        MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        sreq->dev.segment_size = data_sz;

        last = sreq->dev.segment_size;
        sreq->dev.iov_count = MPL_IOV_LIMIT;
        MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

        if (last == sreq->dev.segment_size) {
            /* IOV is able to describe entire message */
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    entire message fits in IOV");
            md.start = sreq->dev.iov;
            md.length = sreq->dev.iov_count;
            md.options = PTL_IOVEC;
            md.eq_handle = MPIDI_nem_ptl_origin_eq;
            md.ct_handle = PTL_CT_NONE;
            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

            REQ_PTL(sreq)->event_handler = handler_send;
            ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                        NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                        NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            goto fn_exit;
        }

        /* IOV is not long enough to describe entire message */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        last = data_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
        MPIU_Assert(last == sreq->dev.segment_size);
        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ,
                                    vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
        goto fn_exit;
    }

    /* Large message.  Send first chunk of data and let receiver get the rest */
    if (dt_contig) {
        /* create ME for buffer so receiver can issue a GET for the data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message");
        big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);

        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                    NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        goto fn_exit;
    }

    /* Large noncontig data */
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message");
    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
    MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = data_sz;

    last = PTL_LARGE_THRESHOLD;
    sreq->dev.iov_count = MPL_IOV_LIMIT;
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

    initial_iov_count = sreq->dev.iov_count;
    sreq->dev.segment_first = last;

    if (last == PTL_LARGE_THRESHOLD) {
        /* first chunk of message fits into IOV */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    first chunk fits in IOV");
        if (initial_iov_count < MPL_IOV_LIMIT) {
            /* There may be space for the rest of the message in this IOV */
            sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count;
            last = sreq->dev.segment_size;

            MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                     &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count);
            remaining_iov_count = sreq->dev.iov_count;

            if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) {
                /* Entire message fit in one IOV */
                int was_incomplete;

                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    rest of message fits in one IOV");
                /* Create ME for remaining data */
                me.start = &sreq->dev.iov[initial_iov_count];
                me.length = remaining_iov_count;
                me.ct_handle = PTL_CT_NONE;
                me.uid = PTL_UID_ANY;
                me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
                               PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC );
                me.match_id = vc_ptl->id;
                me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
                me.ignore_bits = 0;
                me.min_free = 0;

                MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");

                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                                  &REQ_PTL(sreq)->get_me_p[0]);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
                /* increment the cc for the get operation */
                MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
                MPIU_Assert(was_incomplete);

                /* Create MD for first chunk */
                md.start = sreq->dev.iov;
                md.length = initial_iov_count;
                md.options = PTL_IOVEC;
                md.eq_handle = MPIDI_nem_ptl_origin_eq;
                md.ct_handle = PTL_CT_NONE;
                ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

                REQ_PTL(sreq)->event_handler = handler_send;
                ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                goto fn_exit;
            }
Esempio n. 11
0
int
shmem_transport_startup(void)
{
    int ret, i;
    ptl_process_t *desired = NULL;
    ptl_md_t md;
    ptl_le_t le;
    ptl_uid_t uid = PTL_UID_ANY;
    ptl_process_t my_id;
#ifdef USE_ON_NODE_COMMS
    int num_on_node = 0;
#endif

#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING
    /* Make sure the heap and data bases are actually symmetric */
    {
        int peer;
        uint64_t bases[2];

        peer = (shmem_internal_my_pe + 1) % shmem_internal_num_pes;

        ret = shmem_runtime_get(peer, "portals4-bases", bases, sizeof(uint64_t) * 2);
        if (0 != ret) {
            fprintf(stderr, "[%03d] ERROR: runtime_put failed: %d\n",
                    shmem_internal_my_pe, ret);
            return ret;
        }

        if ((uintptr_t) shmem_internal_heap_base != bases[0]) {
            fprintf(stderr, "[%03d] ERROR: heap base address does not match with rank %03d and virtual addressing is enabled\n",
                    shmem_internal_my_pe, peer);
            return -1;
        }
        if ((uintptr_t) shmem_internal_data_base != bases[1]) {
            fprintf(stderr, "[%03d] ERROR: data base address does not match with rank %03d and virtual addressing is enabled\n",
                    shmem_internal_my_pe, peer);
            return -1;
        }
    }
#endif

    desired = malloc(sizeof(ptl_process_t) * shmem_internal_num_pes);
    if (NULL == desired) {
        ret = 1;
        goto cleanup;
    }

    ret = PtlGetPhysId(shmem_transport_portals4_ni_h, &my_id);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlGetPhysId failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    for (i = 0 ; i < shmem_internal_num_pes; ++i) {
        ret = shmem_runtime_get(i, "portals4-procid",
                                &desired[i], sizeof(ptl_process_t));
        if (0 != ret) {
            fprintf(stderr, "[%03d] ERROR: runtime_get failed: %d\n",
                    shmem_internal_my_pe, ret);
            goto cleanup;
        }

#ifdef USE_ON_NODE_COMMS
        /* update the connectivity map... */
        if (desired[i].phys.nid == my_id.phys.nid) {
            SHMEM_SET_RANK_SAME_NODE(i, num_on_node++);
            if (num_on_node > 255) {
                fprintf(stderr, "[%03d] ERROR: Too many local ranks.\n",
                        shmem_internal_my_pe);
                goto cleanup;
            }
        }
#endif
    }

    ret = PtlSetMap(shmem_transport_portals4_ni_h,
                    shmem_internal_num_pes,                    
                    desired);
    if (PTL_OK != ret && PTL_IGNORED != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlSetMap failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    ret = PtlGetUid(shmem_transport_portals4_ni_h, &uid);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlGetUid failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    shmem_transport_portals4_max_volatile_size = ni_limits.max_volatile_size;
    shmem_transport_portals4_max_atomic_size = ni_limits.max_atomic_size;
    shmem_transport_portals4_max_fetch_atomic_size = ni_limits.max_fetch_atomic_size;
    shmem_transport_portals4_max_msg_size = ni_limits.max_msg_size;

    if (shmem_transport_portals4_max_volatile_size < sizeof(long double complex)) {
        fprintf(stderr, "[%03d] ERROR: Max volatile size found to be %lu, too small to continue\n",
                shmem_internal_my_pe, (unsigned long) shmem_transport_portals4_max_volatile_size);
        goto cleanup;
    }
    if (shmem_transport_portals4_max_atomic_size < sizeof(long double complex)) {
        fprintf(stderr, "[%03d] ERROR: Max atomic size found to be %lu, too small to continue\n",
                shmem_internal_my_pe, (unsigned long) shmem_transport_portals4_max_atomic_size);
        goto cleanup;
    }
    if (shmem_transport_portals4_max_fetch_atomic_size < sizeof(long double complex)) {
        fprintf(stderr, "[%03d] ERROR: Max fetch atomic size found to be %lu, too small to continue\n",
                shmem_internal_my_pe, (unsigned long) shmem_transport_portals4_max_fetch_atomic_size);
        goto cleanup;
    }

    /* create portal table entries */
    ret = PtlEQAlloc(shmem_transport_portals4_ni_h, 
                     shmem_transport_portals4_event_slots,
                     &shmem_transport_portals4_eq_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlEQAlloc failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING
    ret = PtlPTAlloc(shmem_transport_portals4_ni_h,
                     0,
                     shmem_transport_portals4_eq_h,
                     shmem_transport_portals4_pt,
                     &all_pt);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlPTAlloc of table entry failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }
#else
    ret = PtlPTAlloc(shmem_transport_portals4_ni_h,
                     0,
                     shmem_transport_portals4_eq_h,
                     shmem_transport_portals4_data_pt,
                     &data_pt);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlPTAlloc of data table failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }
    ret = PtlPTAlloc(shmem_transport_portals4_ni_h,
                     0,
                     shmem_transport_portals4_eq_h,
                     shmem_transport_portals4_heap_pt,
                     &heap_pt);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlPTAlloc of heap table failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }
#endif

#ifndef ENABLE_HARD_POLLING
    /* target ct */
    ret = PtlCTAlloc(shmem_transport_portals4_ni_h, &shmem_transport_portals4_target_ct_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlCTAlloc of target ct failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    le.ct_handle = shmem_transport_portals4_target_ct_h;
#endif
    le.uid = uid;
    le.options = PTL_LE_OP_PUT | PTL_LE_OP_GET | 
        PTL_LE_EVENT_LINK_DISABLE |
        PTL_LE_EVENT_SUCCESS_DISABLE;
#if !defined(ENABLE_HARD_POLLING)
    le.options |= PTL_LE_EVENT_CT_COMM;
#endif
#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING
    le.start = NULL;
    le.length = PTL_SIZE_MAX;
    ret = PtlLEAppend(shmem_transport_portals4_ni_h,
                      shmem_transport_portals4_pt,
                      &le,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &shmem_transport_portals4_le_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlLEAppend of all memory failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }
#else
    /* Open LE to heap section */
    le.start = shmem_internal_heap_base;
    le.length = shmem_internal_heap_length;
    ret = PtlLEAppend(shmem_transport_portals4_ni_h,
                      shmem_transport_portals4_heap_pt,
                      &le,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &shmem_transport_portals4_heap_le_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlLEAppend of heap section failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    /* Open LE to data section */
    le.start = shmem_internal_data_base;
    le.length = shmem_internal_data_length;
    ret = PtlLEAppend(shmem_transport_portals4_ni_h,
                      shmem_transport_portals4_data_pt,
                      &le,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &shmem_transport_portals4_data_le_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlLEAppend of data section failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }
#endif

    /* Open MD to all memory */
    ret = PtlCTAlloc(shmem_transport_portals4_ni_h, &shmem_transport_portals4_put_ct_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlCTAlloc of put ct failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }
    ret = PtlCTAlloc(shmem_transport_portals4_ni_h, &shmem_transport_portals4_get_ct_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlCTAlloc of get ct failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_ACK;
    if (1 == PORTALS4_TOTAL_DATA_ORDERING) {
        md.options |= PTL_MD_UNORDERED;
    }
    md.eq_handle = shmem_transport_portals4_eq_h;
    md.ct_handle = shmem_transport_portals4_put_ct_h;
    ret = PtlMDBind(shmem_transport_portals4_ni_h,
                    &md,
                    &shmem_transport_portals4_put_event_md_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlMDBind of put MD failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_ACK |
        PTL_MD_EVENT_SUCCESS_DISABLE |
        PTL_MD_VOLATILE;
    if (1 == PORTALS4_TOTAL_DATA_ORDERING) {
        md.options |= PTL_MD_UNORDERED;
    }
    md.eq_handle = shmem_transport_portals4_eq_h;
    md.ct_handle = shmem_transport_portals4_put_ct_h;
    ret = PtlMDBind(shmem_transport_portals4_ni_h,
                    &md,
                    &shmem_transport_portals4_put_volatile_md_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlMDBind of put MD failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_ACK |
        PTL_MD_EVENT_SUCCESS_DISABLE;
    if (1 == PORTALS4_TOTAL_DATA_ORDERING) {
        md.options |= PTL_MD_UNORDERED;
    }
    md.eq_handle = shmem_transport_portals4_eq_h;
    md.ct_handle = shmem_transport_portals4_put_ct_h;
    ret = PtlMDBind(shmem_transport_portals4_ni_h,
                    &md,
                    &shmem_transport_portals4_put_cntr_md_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlMDBind of put cntr MD failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_REPLY | 
        PTL_MD_EVENT_SUCCESS_DISABLE;
    if (1 == PORTALS4_TOTAL_DATA_ORDERING) {
        md.options |= PTL_MD_UNORDERED;
    }
    md.eq_handle = shmem_transport_portals4_eq_h;
    md.ct_handle = shmem_transport_portals4_get_ct_h;
    ret = PtlMDBind(shmem_transport_portals4_ni_h,
                    &md,
                    &shmem_transport_portals4_get_md_h);
    if (PTL_OK != ret) {
        fprintf(stderr, "[%03d] ERROR: PtlMDBind of get MD failed: %d\n",
                shmem_internal_my_pe, ret);
        goto cleanup;
    }

    ret = 0;

 cleanup:
    if (NULL != desired) free(desired);
    return ret;
}
Esempio n. 12
0
void test_prepostME(int             cache_size,
                    int            *cache_buf,
                    ptl_handle_ni_t ni,
                    int             npeers,
                    int             nmsgs,
                    int             nbytes,
                    int             niters)
{
    int    i, j, k;
    double tmp, total = 0;

    ptl_handle_md_t send_md_handle;
    ptl_md_t        send_md;
    ptl_process_t   dest;
    ptl_size_t      offset;
    ptl_pt_index_t  index;
    ptl_handle_eq_t recv_eq_handle;
    ptl_handle_me_t me_handles[npeers * nmsgs];
    ptl_event_t     event;

    ptl_assert(PtlEQAlloc(ni, nmsgs * npeers + 1,
                          &send_md.eq_handle), PTL_OK);

    send_md.start     = send_buf;
    send_md.length    = SEND_BUF_SIZE;
    send_md.options   = PTL_MD_UNORDERED;
    send_md.ct_handle = PTL_CT_NONE;

    ptl_assert(PtlMDBind(ni, &send_md, &send_md_handle), PTL_OK);

    ptl_assert(PtlEQAlloc(ni, nmsgs * npeers + 1, &recv_eq_handle), PTL_OK);

    ptl_assert(PtlPTAlloc(ni, 0, recv_eq_handle, TestSameDirectionIndex,
                          &index), PTL_OK);

    ptl_assert(TestSameDirectionIndex, index);

    tmp = timer();
    for (j = 0; j < npeers; ++j) {
        for (k = 0; k < nmsgs; ++k) {
            ptl_process_t src;
            src.rank = recv_peers[j];
            postME(ni, index, recv_buf + (nbytes * (k + j * nmsgs)),
                   nbytes, src, magic_tag, &me_handles[k + j * nmsgs]);
        }
    }
    total += (timer() - tmp);

    for (i = 0; i < niters - 1; ++i) {
        cache_invalidate(cache_size, cache_buf);

        libtest_Barrier();

        tmp = timer();
        for (j = 0; j < npeers; ++j) {
            for (k = 0; k < nmsgs; ++k) {
                offset    = (nbytes * (k + j * nmsgs));
                dest.rank = send_peers[npeers - j - 1],
                ptl_assert(libtest_Put_offset(send_md_handle, offset, nbytes,
                                           dest, index, magic_tag, offset), PTL_OK);
            }
        }

        /* wait for sends */
        for (j = 0; j < npeers * nmsgs; ++j) {
            ptl_assert(PtlEQWait(send_md.eq_handle, &event), PTL_OK);
            ptl_assert(event.type, PTL_EVENT_SEND);
        }

        /* wait for receives */
        for (j = 0; j < npeers * nmsgs; j++) {
            PtlEQWait(recv_eq_handle, &event);
        }

        for (j = 0; j < npeers; ++j) {
            for (k = 0; k < nmsgs; ++k) {
                ptl_process_t src;
                src.rank = recv_peers[j];
                postME(ni, index, recv_buf + (nbytes * (k + j * nmsgs)),
                       nbytes, src, magic_tag, &me_handles[k + j * nmsgs]);
            }
        }
        total += (timer() - tmp);
    }

    libtest_Barrier();

    tmp = timer();
    for (j = 0; j < npeers; ++j) {
        for (k = 0; k < nmsgs; ++k) {
            offset    = (nbytes * (k + j * nmsgs));
            dest.rank = send_peers[npeers - j - 1],
            ptl_assert(libtest_Put_offset(send_md_handle, offset, nbytes, dest,
                                       index, magic_tag, offset), PTL_OK);
        }
    }
    /* wait for sends */
    for (j = 0; j < npeers * nmsgs; ++j) {
        ptl_assert(PtlEQWait(send_md.eq_handle, &event), PTL_OK);
        ptl_assert(event.type, PTL_EVENT_SEND);
    }

    /* wait for receives */
    for (j = 0; j < npeers * nmsgs; j++) {
        PtlEQWait(recv_eq_handle, &event);
    }

    total += (timer() - tmp);

    ptl_assert(PtlEQFree(send_md.eq_handle), PTL_OK);

    ptl_assert(PtlMDRelease(send_md_handle), PTL_OK);

    ptl_assert(PtlEQFree(recv_eq_handle), PTL_OK);

    ptl_assert(PtlPTFree(ni, index), PTL_OK);

    tmp = libtest_AllreduceDouble(total, PTL_SUM);
    display_result("pre-post", (niters * npeers * nmsgs * 2) / (tmp / world_size));
}
Esempio n. 13
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_h;
    ptl_pt_index_t  pt_index;
    uint64_t       *buf;
    ENTRY_T         entry;
    HANDLE_T        entry_h;
    ptl_md_t        md;
    ptl_handle_md_t md_h;
    int             rank;
    int             num_procs;
    int             ret;
    ptl_process_t  *procs;
    ptl_handle_eq_t eq_h;
    ptl_event_t     ev;
    ptl_hdr_data_t rcvd = 0;
    ptl_hdr_data_t goal = 0;
    ptl_hdr_data_t hdr_data = 1;
    ptl_size_t offset = sizeof(uint64_t);
    uint32_t distance;
    int sends = 0;

    CHECK_RETURNVAL(PtlInit());
    CHECK_RETURNVAL(libtest_init());

    rank = libtest_get_rank();
    num_procs = libtest_get_size();

    /* This test only succeeds if we have more than one rank */
    if (num_procs < 2) return 77;

    CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL,
                              PTL_PID_ANY, NULL, NULL, &ni_h));

    procs = libtest_get_mapping(ni_h);
    CHECK_RETURNVAL(PtlSetMap(ni_h, num_procs, procs));

    CHECK_RETURNVAL(PtlEQAlloc(ni_h, 1024, &eq_h));
    CHECK_RETURNVAL(PtlPTAlloc(ni_h, 0, eq_h, 0, &pt_index));
    assert(pt_index == 0);
    
    buf = malloc(sizeof(uint64_t) * num_procs);
    assert(NULL != buf);

    md.start = buf;
    md.length = sizeof(uint64_t) * num_procs;
    md.options = PTL_MD_UNORDERED;
    md.eq_handle = eq_h;
    md.ct_handle = PTL_CT_NONE;
    CHECK_RETURNVAL(PtlMDBind(ni_h, &md, &md_h));

    entry.start = buf;
    entry.length = sizeof(uint64_t) * num_procs;
    entry.ct_handle = PTL_CT_NONE;
    entry.uid = PTL_UID_ANY;
    entry.options = OPTIONS;
#if MATCHING == 1
    entry.match_id.rank = PTL_RANK_ANY;
    entry.match_bits = 0;
    entry.ignore_bits = 0;
    entry.min_free = 0;
#endif
    CHECK_RETURNVAL(APPEND(ni_h, pt_index, &entry,
                           PTL_PRIORITY_LIST, NULL, &entry_h));

    /* ensure ME is linked before the barrier */
    CHECK_RETURNVAL(PtlEQWait(eq_h, &ev));
    assert( ev.type == PTL_EVENT_LINK );

    libtest_barrier();

    /* Bruck's Concatenation Algorithm */
    memcpy(buf, &rank, sizeof(uint64_t));
    for (distance = 1; distance < num_procs; distance *= 2) {
        ptl_size_t to_xfer;
        int peer;
        ptl_process_t proc;

        if (rank >= distance) {
            peer = rank - distance;
        } else {
            peer = rank + (num_procs - distance);
        }

        to_xfer = sizeof(uint64_t) * MIN(distance, num_procs - distance);
        proc.rank = peer;
        CHECK_RETURNVAL(PtlPut(md_h, 
                               0, 
                               to_xfer, 
                               PTL_NO_ACK_REQ, 
                               proc,
                               0,
                               0,
                               offset,
                               NULL,
                               hdr_data));
        sends += 1;

        /* wait for completion of the proper receive, and keep count
           of uncompleted sends.  "rcvd" is an accumulator to deal
           with out-of-order receives, which are IDed by the
           hdr_data */
        goal |= hdr_data;
        while ((rcvd & goal) != goal) {
            ret = PtlEQWait(eq_h, &ev);
            switch (ret) {
            case PTL_OK:
                if (ev.type == PTL_EVENT_SEND) {
                    sends -= 1;
                } else {
                    rcvd |= ev.hdr_data;
                    assert(ev.type == PTL_EVENT_PUT);
                    assert(ev.rlength == ev.mlength);
                    assert((ev.rlength == to_xfer) || (ev.hdr_data != hdr_data));
                }
                break;
            default:
                fprintf(stderr, "PtlEQWait failure: %d\n", ret);
                abort();
            }
        }
        
        hdr_data <<= 1;
        offset += to_xfer;
    }

    /* wait for any SEND_END events not yet seen */
    while (sends) {
        ret = PtlEQWait(eq_h, &ev);
        switch (ret) {
        case PTL_OK:
            assert( ev.type == PTL_EVENT_SEND );
            sends -= 1;
            break;
        default:
            fprintf(stderr, "PtlEQWait failure: %d\n", ret);
            abort();
        }
    }

    CHECK_RETURNVAL(UNLINK(entry_h));
    CHECK_RETURNVAL(PtlMDRelease(md_h));
    free(buf);

    libtest_barrier();

    /* cleanup */
    CHECK_RETURNVAL(PtlPTFree(ni_h, pt_index));
    CHECK_RETURNVAL(PtlEQFree(eq_h));
    CHECK_RETURNVAL(PtlNIFini(ni_h));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 14
0
int
ompi_mtl_portals_isend(struct mca_mtl_base_module_t* mtl,
                       struct ompi_communicator_t* comm,
                       int dest,
                       int tag,
                       struct ompi_convertor_t *convertor,
                       mca_pml_base_send_mode_t mode,
                       bool blocking,
                       mca_mtl_request_t *mtl_request)
{
    int ret;
    ptl_match_bits_t match_bits;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    ptl_handle_me_t me_h;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_base_endpoint_t *endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml;
    ompi_mtl_portals_request_t *ptl_request = 
        (ompi_mtl_portals_request_t*) mtl_request;
    size_t buflen;

    assert(mtl == &ompi_mtl_portals.base);

    ret = ompi_mtl_datatype_pack(convertor, &md.start, &buflen,
                                 &(ptl_request->free_after));
    if (OMPI_SUCCESS != ret) return ret;
    md.length = buflen;

    ptl_request->event_callback = ompi_mtl_portals_send_progress;

    if ((MCA_PML_BASE_SEND_READY == mode)) {
        /* ready send (length doesn't matter) or short non-sync send.
           Eagerly send data and don't wait for completion */
        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_READY_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "ready send bits: 0x%016llx\n", 
                             match_bits));

        md.threshold = 1;
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h,
                        md,
                        PTL_UNLINK,
                        &(md_h));
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack;

        ret = PtlPut(md_h,
                     PTL_NO_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     0);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

    } else if (md.length > ompi_mtl_portals.eager_limit) {
        /* it's a long message - same protocol for all send modes
           other than ready */
        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_LONG_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "long send bits: 0x%016llx (%d)\n", 
                             match_bits, dest));

        md.threshold = 2; /* send, {ack, get} */
        md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h,
                          OMPI_MTL_PORTALS_READ_TABLE_ID,
                          endpoint->ptl_proc,
                          (ptl_match_bits_t)(uintptr_t) ptl_request,
                          0,
                          PTL_UNLINK,
                          PTL_INS_AFTER,
                          &me_h);
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlMDAttach(me_h,
                          md,
                          PTL_UNLINK,
                          &(md_h));

        if (OMPI_SUCCESS != ret) {
            PtlMEUnlink(me_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlPut(md_h,
                     PTL_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     (ptl_hdr_data_t)(uintptr_t) ptl_request);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

    } else if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
        /* short synchronous message */
        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_SHORT_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "short ssend bits: 0x%016llx (%d)\n", 
                             match_bits, dest));

        md.threshold = 2; /* send, {ack, put} */
        md.options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h,
                          OMPI_MTL_PORTALS_ACK_TABLE_ID,
                          endpoint->ptl_proc,
                          (ptl_match_bits_t)(uintptr_t) ptl_request,
                          0,
                          PTL_UNLINK,
                          PTL_INS_AFTER,
                          &me_h);
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlMDAttach(me_h,
                          md,
                          PTL_UNLINK,
                          &(md_h));

        if (OMPI_SUCCESS != ret) {
            PtlMEUnlink(me_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlPut(md_h,
                     PTL_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     (ptl_hdr_data_t)(uintptr_t) ptl_request);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

    } else { 
        /* short send message */

        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_SHORT_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "short send bits: 0x%016llx\n", 
                             match_bits));

        md.threshold = 1;
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h,
                        md,
                        PTL_UNLINK,
                        &(md_h));
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack;

        ret = PtlPut(md_h,
                     PTL_NO_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     0);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 15
0
static int
portals4_init_interface(void)
{
    unsigned int ret;
    ptl_md_t md;
    ptl_me_t me;

    /* create event queues */
    ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
                     ompi_mtl_portals4.send_queue_size,
                     &ompi_mtl_portals4.send_eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
                     ompi_mtl_portals4.recv_queue_size,
                     &ompi_mtl_portals4.recv_eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Create send and long message (read) portal table entries */
    ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
                     PTL_PT_ONLY_USE_ONCE |
                     PTL_PT_ONLY_TRUNCATE |
                     PTL_PT_FLOWCTRL,
                     ompi_mtl_portals4.recv_eq_h,
                     REQ_RECV_TABLE_ID,
                     &ompi_mtl_portals4.recv_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    if (ompi_mtl_portals4.recv_idx != REQ_RECV_TABLE_ID) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n",
                            __FILE__, __LINE__, ompi_mtl_portals4.recv_idx);
        goto error;
    }

    ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
                     PTL_PT_ONLY_USE_ONCE |
                     PTL_PT_ONLY_TRUNCATE,
                     ompi_mtl_portals4.send_eq_h,
                     REQ_READ_TABLE_ID,
                     &ompi_mtl_portals4.read_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    if (ompi_mtl_portals4.read_idx != REQ_READ_TABLE_ID) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n",
                            __FILE__, __LINE__, ompi_mtl_portals4.read_idx);
        goto error;
    }

    /* bind zero-length md for sending acks */
    md.start     = NULL;
    md.length    = 0;
    md.options   = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                    &md,
                    &ompi_mtl_portals4.zero_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Bind MD across all memory */
    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = 0;
    md.eq_handle = ompi_mtl_portals4.send_eq_h;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                    &md,
                    &ompi_mtl_portals4.send_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Handle long overflows */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    me.options = PTL_ME_OP_PUT |
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_COMM_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE;
    if (ompi_mtl_portals4.use_logical) {
        me.match_id.rank = PTL_RANK_ANY;
    } else {
        me.match_id.phys.nid = PTL_NID_ANY;
        me.match_id.phys.pid = PTL_PID_ANY;
    }
    me.match_bits = MTL_PORTALS4_LONG_MSG;
    me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK |
        MTL_PORTALS4_SOURCE_MASK |
        MTL_PORTALS4_TAG_MASK;
    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.recv_idx,
                      &me,
                      PTL_OVERFLOW_LIST,
                      NULL,
                      &ompi_mtl_portals4.long_overflow_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* attach short unex recv blocks */
    ret = ompi_mtl_portals4_recv_short_init();
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: short receive block initialization failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    ompi_mtl_portals4.opcount = 0;
#if OPAL_ENABLE_DEBUG
    ompi_mtl_portals4.recv_opcount = 0;
#endif

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    ret = ompi_mtl_portals4_flowctl_init();
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
#endif

    return OMPI_SUCCESS;

 error:
    if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) {
        PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
        PtlMDRelease(ompi_mtl_portals4.zero_md_h);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
        PtlMDRelease(ompi_mtl_portals4.send_md_h);
    }
    if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
        PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
    }
    if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) {
        PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) {
        PtlEQFree(ompi_mtl_portals4.send_eq_h);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) {
        PtlEQFree(ompi_mtl_portals4.recv_eq_h);
    }
    return OMPI_ERROR;
}
Esempio n. 16
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_handle;
    ptl_process_t   *procs;
    int             rank;
    ptl_pt_index_t  pt_index, signal_pt_index;
    HANDLE_T        signal_e_handle;
    HANDLE_T        signal_e2_handle;
    int             num_procs;
    ptl_handle_eq_t eq_handle;
    ptl_handle_ct_t ct_handle;
    ptl_handle_md_t md_handle;
    ptl_ni_limits_t limits_reqd, limits_actual;
    ENTRY_T         value_e;

    limits_reqd.max_entries = 1024;
    limits_reqd.max_unexpected_headers = ITERS*2;
    limits_reqd.max_mds = 1024;
    limits_reqd.max_eqs = 1024;
    limits_reqd.max_cts = 1024;
    limits_reqd.max_pt_index = 64;
    limits_reqd.max_iovecs = 1024;
    limits_reqd.max_list_size = 1024;
    limits_reqd.max_triggered_ops = 1024;
    limits_reqd.max_msg_size = 1048576;
    limits_reqd.max_atomic_size = 1048576;
    limits_reqd.max_fetch_atomic_size = 1048576;
    limits_reqd.max_waw_ordered_size = 1048576;
    limits_reqd.max_war_ordered_size = 1048576;
    limits_reqd.max_volatile_size = 1048576;
    limits_reqd.features = 0;

    CHECK_RETURNVAL(PtlInit());

    CHECK_RETURNVAL(libtest_init());

    rank = libtest_get_rank();
    num_procs = libtest_get_size();
    if (num_procs < 2) {
        fprintf(stderr, "test_flowctl_noeq requires at least two processes\n");
        return 77;
    }

    int iters;

    if (num_procs < ITERS)
        iters = ITERS*2+1;
    else
        iters = ITERS;

    CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL,
                              PTL_PID_ANY, &limits_reqd, &limits_actual, &ni_handle));
    procs = libtest_get_mapping(ni_handle);
    CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs));


    if (0 == rank) {

        /* create data PT space */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * iters + 64, &eq_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5,
                                   &pt_index));

        /* create signal ME */
        CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 1, eq_handle, 6,
                                   &signal_pt_index));
        value_e.start = NULL;
        value_e.length = 0;
        value_e.ct_handle = ct_handle;
        value_e.uid = PTL_UID_ANY;
        value_e.options = OPTIONS | PTL_LE_EVENT_CT_COMM;
#if INTERFACE == 1
        value_e.match_id.rank = PTL_RANK_ANY;
        value_e.match_bits = 0;
        value_e.ignore_bits = 0;
#endif
        CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_OVERFLOW_LIST, NULL, &signal_e_handle));
    } else {
        ptl_md_t        md;

        /* 16 extra just in case... */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, iters*2 + 16, &eq_handle));

        md.start = NULL;
        md.length = 0;
        md.options = 0;
        md.eq_handle = eq_handle;
        md.ct_handle = PTL_CT_NONE;

        CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle));
    }

    fprintf(stderr,"at barrier \n");
    libtest_barrier();

    if (0 == rank) {
        ptl_ct_event_t  ct;
        ptl_event_t ev;
        int ret, count = 0, saw_flowctl = 0;

        fprintf(stderr,"begin ctwait \n");
        /* wait for signal counts */
        CHECK_RETURNVAL(PtlCTWait(ct_handle, iters / 2 , &ct));
        if (ct.success != iters / 2 || ct.failure != 0) {
            return 1;
        }
        fprintf(stderr,"done CT wait \n");
        /* wait for event entries */
        while (1) {
            ret = PtlEQGet(eq_handle, &ev);
            if (PTL_OK == ret) {
                count++;
                fprintf(stderr, "found EQ value \n");
            } else if (ret == PTL_EQ_EMPTY) {
                continue;
            } else {
                fprintf(stderr, "0: Unexpected return code from EQGet: %d\n", ret);
                return 1;
            }

            if (ev.type == PTL_EVENT_PT_DISABLED) {
                saw_flowctl++;
                break;
            }
        }

        fprintf(stderr, "0: Saw %d flowctl\n", saw_flowctl);
        if (saw_flowctl == 0) {
            return 1;
        }
        /* Now clear out all of the unexpected messages so we can clean up everything */
        CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e2_handle));
        ret = PTL_OK;
        while (ret != PTL_EQ_EMPTY)
            ret = PtlEQGet(eq_handle, &ev);
    } else {
        ptl_process_t target;
        ptl_event_t ev;
        int ret, count = 0, fails = 0;
        int i;

        target.rank = 0;
        printf("beginning puts \n");
        for (i = 0 ; i < iters ; ++i) {
            CHECK_RETURNVAL(PtlPut(md_handle,
                                   0,
                                   0,
                                   PTL_ACK_REQ,
                                   target,
                                   5,
                                   0,
                                   0,
                                   NULL,
                                   0));
            usleep(100);
        }

        while (count < iters) {
            ret = PtlEQGet(eq_handle, &ev);
            if (PTL_EQ_EMPTY == ret) {
                continue;
            } else if (PTL_OK != ret) {
                fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret);
                return 1;
            }

            if (ev.ni_fail_type == PTL_NI_OK) {
                if (ev.type == PTL_EVENT_SEND) {
                    continue;
                } else if (ev.type == PTL_EVENT_ACK) {
                    count++;
                } else {
                    fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type);
                }
            } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) {
                count++;
                fails++;
            } else if (ev.ni_fail_type == PTL_EQ_EMPTY) {
                continue;
            } else if (ev.ni_fail_type == PTL_EQ_DROPPED) {
                continue;
            } else {
                fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type);
                return 1;
            }
        }

        fprintf(stderr, "%d: Saw %d of %d ACKs as fails\n", rank, fails, count);
    }

    fprintf(stderr,"at final barrier \n");

    libtest_barrier();

    if (0 == rank) {

        CHECK_RETURNVAL(UNLINK(signal_e_handle));
        CHECK_RETURNVAL(UNLINK(signal_e2_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index));
        CHECK_RETURNVAL(PtlCTFree(ct_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    } else {
        CHECK_RETURNVAL(PtlMDRelease(md_handle));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    }

    fprintf(stderr,"final cleanup \n");
    CHECK_RETURNVAL(PtlNIFini(ni_handle));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 17
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_handle;
    ptl_process_t   *procs;
    int             rank;
    ptl_pt_index_t  pt_index, signal_pt_index;
    HANDLE_T        value_e_handle, signal_e_handle;
    int             num_procs;
    ptl_handle_eq_t eq_handle;
    ptl_handle_ct_t ct_handle;
    ptl_handle_md_t md_handle;

    CHECK_RETURNVAL(PtlInit());

    CHECK_RETURNVAL(libtest_init());

    rank = libtest_get_rank();
    num_procs = libtest_get_size();
    if (num_procs < 2) {
        fprintf(stderr, "test_flowctl_noeq requires at least two processes\n");
        return 77;
    }

    CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL,
                              PTL_PID_ANY, NULL, NULL, &ni_handle));
    procs = libtest_get_mapping(ni_handle);
    CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs));


    if (0 == rank) {
        ENTRY_T         value_e;

        /* create data ME */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * ITERS / 2, &eq_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5,
                                   &pt_index));
        value_e.start = NULL;
        value_e.length = 0;
        value_e.ct_handle = PTL_CT_NONE;
        value_e.uid = PTL_UID_ANY;
        value_e.options = OPTIONS;
#if INTERFACE == 1
        value_e.match_id.rank = PTL_RANK_ANY;
        value_e.match_bits = 0;
        value_e.ignore_bits = 0;
#endif
        CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &value_e_handle));

        /* create signal ME */
        CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 0, PTL_EQ_NONE, 6,
                                   &signal_pt_index));
        value_e.start = NULL;
        value_e.length = 0;
        value_e.ct_handle = ct_handle;
        value_e.uid = PTL_UID_ANY;
        value_e.options = OPTIONS | PTL_LE_EVENT_SUCCESS_DISABLE | PTL_LE_EVENT_CT_COMM;
#if INTERFACE == 1
        value_e.match_id.rank = PTL_RANK_ANY;
        value_e.match_bits = 0;
        value_e.ignore_bits = 0;
#endif
        CHECK_RETURNVAL(APPEND(ni_handle, 6, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e_handle));
    } else {
        ptl_md_t        md;

        /* 16 extra just in case... */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, ITERS * 2 + 16, &eq_handle));

        md.start = NULL;
        md.length = 0;
        md.options = 0;
        md.eq_handle = eq_handle;
        md.ct_handle = PTL_CT_NONE;

        CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle));
    }

    libtest_barrier();

    if (0 == rank) {
        ptl_ct_event_t  ct;
        ptl_event_t ev;
        int ret, count = 0, saw_dropped = 0, saw_flowctl = 0;

        /* wait for signal counts */
        CHECK_RETURNVAL(PtlCTWait(ct_handle, num_procs - 1, &ct));
        if (ct.success != num_procs - 1 || ct.failure != 0) {
            return 1;
        }

        /* wait for event entries */
        while (count < ITERS * (num_procs - 1)) {
            ret = PtlEQWait(eq_handle, &ev);
            if (PTL_OK == ret) {
                ;
            } else if (PTL_EQ_DROPPED == ret) {
                saw_dropped++;
                if (ev.type == PTL_EVENT_PT_DISABLED){
                    saw_flowctl++;
                    CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index));
                }
                break;
            } else {
                fprintf(stderr, "0: Unexpected return code from EQWait: %d\n", ret);
                return 1;
            }

            if (ev.type == PTL_EVENT_PT_DISABLED) {
                CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index));
                saw_flowctl++;
            } else {
                count++;
            }
        }

        fprintf(stderr, "0: Saw %d dropped, %d flowctl\n", saw_dropped, saw_flowctl);
        if (saw_flowctl == 0) {
            return 1;
        }
    } else {
        ptl_process_t target;
        ptl_event_t ev;
        int ret, count = 0, fails = 0;
        int i;
        int *fail_seen;

        fail_seen = malloc(sizeof(int) * ITERS);
        if (NULL == fail_seen) {
             fprintf(stderr, "%d: malloc failed\n", rank);
             return 1;
        }
        memset(fail_seen, 0, sizeof(int) * ITERS);

        target.rank = 0;
        for (i = 0 ; i < ITERS ; ++i) {
            CHECK_RETURNVAL(PtlPut(md_handle,
                                   0,
                                   0,
                                   PTL_ACK_REQ,
                                   target,
                                   5,
                                   0,
                                   0,
                                   (void*)(size_t)i,
                                   0));
            usleep(100);
        }

        while (count < ITERS) {
            ret = PtlEQGet(eq_handle, &ev);
            if (PTL_EQ_EMPTY == ret) {
                continue;
            } else if (PTL_OK != ret) {
                fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret);
                return 1;
            }

            if (ev.ni_fail_type == PTL_NI_OK) {
                if (ev.type == PTL_EVENT_SEND) {
                    continue;
                } else if (ev.type == PTL_EVENT_ACK) {
                    count++;
                } else {
                    fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type);
                }
            } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) {
                int iter = (size_t) ev.user_ptr;
                if (fail_seen[iter]++ > 0) {
                    fprintf(stderr, "%d: Double report of PT_DISABLED for "
                            "iteration %d\n", rank, iter);
                    return 1;
                }
                count++;
                fails++;
            } else {
                fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type);
                return 1;
            }
        }

        fprintf(stderr, "%d: Saw %d of %d events as fails\n", rank, fails, count);

        CHECK_RETURNVAL(PtlPut(md_handle,
                               0,
                               0,
                               PTL_NO_ACK_REQ,
                               target,
                               6,
                               0,
                               0,
                               NULL,
                               0));
        /* wait for the send event on the last put */
        CHECK_RETURNVAL(PtlEQWait(eq_handle, &ev));

        while (fails > 0) {
            CHECK_RETURNVAL(PtlPut(md_handle,
                                   0,
                                   0,
                                   PTL_ACK_REQ,
                                   target,
                                   5,
                                   0,
                                   0,
                                   NULL,
                                   0));
            while (1) {
                ret = PtlEQWait(eq_handle, &ev);
                if (PTL_OK != ret) {
                    fprintf(stderr, "%d: PtlEQWait returned %d\n", rank, ret);
                    return 1;
                }

                if (ev.ni_fail_type == PTL_NI_OK) {
                    if (ev.type == PTL_EVENT_SEND) {
                        continue;
                    } else if (ev.type == PTL_EVENT_ACK) {
                        fails--;
                        break;
                    } else {
                        fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type);
                    }
                } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) {
                    break;
                } else {
                    fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type);
                    return 1;
                }
            }
        }
    }

    libtest_barrier();

    if (0 == rank) {
        CHECK_RETURNVAL(UNLINK(signal_e_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index));
        CHECK_RETURNVAL(PtlCTFree(ct_handle));
        CHECK_RETURNVAL(UNLINK(value_e_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    } else {
        CHECK_RETURNVAL(PtlMDRelease(md_handle));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    }

    CHECK_RETURNVAL(PtlNIFini(ni_handle));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
/*
    /!\ Called for each processes /!\
 */
static int
portals4_init_query(bool enable_progress_threads,
        bool enable_mpi_threads)
{
    int ret;
    ptl_md_t md;
    ptl_me_t me;

    /* Initialize Portals and create a physical, matching interface */
    ret = PtlInit();
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlNIInit(PTL_IFACE_DEFAULT,
            PTL_NI_PHYSICAL | PTL_NI_MATCHING,
            PTL_PID_ANY,
            NULL,
            &mca_coll_portals4_component.ni_limits,
            &mca_coll_portals4_component.ni_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlNIInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }


    ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    /* FIX ME: Need to make sure our ID matches with the MTL... */
    ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetUid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlEQAlloc(mca_coll_portals4_component.ni_h,
            MCA_COLL_PORTALS4_EQ_SIZE,
            &mca_coll_portals4_component.eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlEQAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_TABLE_ID,
            &mca_coll_portals4_component.pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_FINISH_TABLE_ID,
            &mca_coll_portals4_component.finish_pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    /* Bind MD/MDs across all memory.  We prefer (for obvious reasons)
       to have a single MD across all of memory */
    memset(&md, 0, sizeof(ptl_md_t));
    md.start = 0;
    md.length = 0;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.zero_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.data_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length));

    /* setup finish ack ME */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = 0;
    me.ignore_bits = 0;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.finish_pt_idx,
            &me,
            PTL_PRIORITY_LIST,
            NULL,
            &mca_coll_portals4_component.finish_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* This ME is used for RTR exchange only */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;

    /* Note : the RTR bit must be set to match this ME,
     * this allows to discriminate the RTR from data flow
     * (especially for the Barrier operations)
     */
    COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0);
    me.ignore_bits = ~COLL_PORTALS4_RTR_MASK;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.pt_idx,
            &me,
            PTL_OVERFLOW_LIST,
            NULL,
            &mca_coll_portals4_component.unex_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* activate progress callback */
    ret = opal_progress_register(portals4_progress);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: opal_progress_register failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;

    }
    return OMPI_SUCCESS;

}
Esempio n. 19
0
static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    ptl_md_t md;
    ptl_ni_limits_t desired;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PTL_INIT);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PTL_INIT);

    /* first make sure that our private fields in the vc and req fit into the area provided  */
    MPIR_Assert(sizeof(MPID_nem_ptl_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
    MPIR_Assert(sizeof(MPID_nem_ptl_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN);

    /* Make sure our IOV is the same as portals4's IOV */
    MPIR_Assert(sizeof(ptl_iovec_t) == sizeof(MPL_IOV));
    MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_base)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_BUF)));
    MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_len))  == ((void*)&(((MPL_IOV*)0)->MPL_IOV_LEN)));
    MPIR_Assert(sizeof(((ptl_iovec_t*)0)->iov_len) == sizeof(((MPL_IOV*)0)->MPL_IOV_LEN));
            

    mpi_errno = MPIDI_CH3I_Register_anysource_notification(MPID_nem_ptl_anysource_posted, MPID_nem_ptl_anysource_matched);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    MPIDI_Anysource_improbe_fn = MPID_nem_ptl_anysource_improbe;

    /* init portals */
    ret = PtlInit();
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlinit", "**ptlinit %s", MPID_nem_ptl_strerror(ret));
    
    /* do an interface pre-init to get the default limits struct */
    ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL,
                    PTL_PID_ANY, NULL, &desired, &MPIDI_nem_ptl_ni);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret));

    /* finalize the interface so we can re-init with our desired maximums */
    ret = PtlNIFini(MPIDI_nem_ptl_ni);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlnifini", "**ptlnifini %s", MPID_nem_ptl_strerror(ret));

    /* set higher limits if they are determined to be too low */
    if (desired.max_unexpected_headers < UNEXPECTED_HDR_COUNT && getenv("PTL_LIM_MAX_UNEXPECTED_HEADERS") == NULL)
        desired.max_unexpected_headers = UNEXPECTED_HDR_COUNT;
    if (desired.max_list_size < LIST_SIZE && getenv("PTL_LIM_MAX_LIST_SIZE") == NULL)
        desired.max_list_size = LIST_SIZE;
    if (desired.max_entries < ENTRY_COUNT && getenv("PTL_LIM_MAX_ENTRIES") == NULL)
        desired.max_entries = ENTRY_COUNT;

    /* do the real init */
    ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL,
                    PTL_PID_ANY, &desired, &MPIDI_nem_ptl_ni_limits, &MPIDI_nem_ptl_ni);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret));

    /* allocate EQs for each portal */
    ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_eq);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret));

    ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_get_eq);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret));

    ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_control_eq);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret));

    ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_rpt_eq);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate a separate EQ for origin events. with this, we can implement rate-limit operations
       to prevent a locally triggered flow control even */
    ret = PtlEQAlloc(MPIDI_nem_ptl_ni, EVENT_COUNT, &MPIDI_nem_ptl_origin_eq);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqalloc", "**ptleqalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate portal for matching messages */
    ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_eq,
                     PTL_PT_ANY, &MPIDI_nem_ptl_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate portal for large messages where receiver does a get */
    ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_get_eq,
                     PTL_PT_ANY, &MPIDI_nem_ptl_get_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate portal for MPICH control messages */
    ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_control_eq,
                     PTL_PT_ANY, &MPIDI_nem_ptl_control_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate portal for MPICH control messages */
    ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq,
                     PTL_PT_ANY, &MPIDI_nem_ptl_rpt_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate portal for MPICH control messages */
    ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq,
                     PTL_PT_ANY, &MPIDI_nem_ptl_get_rpt_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* allocate portal for MPICH control messages */
    ret = PtlPTAlloc(MPIDI_nem_ptl_ni, PTL_PT_ONLY_USE_ONCE | PTL_PT_ONLY_TRUNCATE | PTL_PT_FLOWCTRL, MPIDI_nem_ptl_rpt_eq,
                     PTL_PT_ANY, &MPIDI_nem_ptl_control_rpt_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* create an MD that covers all of memory */
    md.start = 0;
    md.length = (ptl_size_t)-1;
    md.options = 0x0;
    md.eq_handle = MPIDI_nem_ptl_origin_eq;
    md.ct_handle = PTL_CT_NONE;
    ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &MPIDI_nem_ptl_global_md);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

    /* currently, rportlas only works with a single NI and EQ */
    ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, ORIGIN_EVENTS, get_target_info);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret));

    /* allow rportal to manage the primary portal and retransmit if needed */
    ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_pt, MPIDI_nem_ptl_rpt_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* allow rportal to manage the get and control portals, but we
     * don't expect retransmission to be needed on the get portal, so
     * we pass PTL_PT_ANY as the dummy portal.  unfortunately, portals
     * does not have an "invalid" PT constant, which would have been
     * more appropriate to pass over here. */
    ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_get_pt, MPIDI_nem_ptl_get_rpt_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    ret = MPID_nem_ptl_rptl_ptinit(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_origin_eq, MPIDI_nem_ptl_control_pt, MPIDI_nem_ptl_control_rpt_pt);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlptalloc", "**ptlptalloc %s", MPID_nem_ptl_strerror(ret));

    /* create business card */
    mpi_errno = get_business_card(pg_rank, bc_val_p, val_max_sz_p);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* init other modules */
    mpi_errno = MPID_nem_ptl_poll_init();
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPID_nem_ptl_nm_init();
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    
 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PTL_INIT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Esempio n. 20
0
static int
component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
                 struct ompi_communicator_t *comm, struct ompi_info_t *info,
                 int flavor, int *model)
{
    ompi_osc_portals4_module_t *module = NULL;
    int ret = OMPI_ERROR;
    int tmp;
    ptl_md_t md;
    ptl_me_t me;
    char *name;

    if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED;

    /* create module structure */
    module = (ompi_osc_portals4_module_t*)
        calloc(1, sizeof(ompi_osc_portals4_module_t));
    if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_portals4_module_template,
           sizeof(ompi_osc_base_module_t));

    /* fill in our part */
    if (MPI_WIN_FLAVOR_ALLOCATE == flavor) {
        module->free_after = *base = malloc(size);
        if (NULL == *base) goto error;
    } else {
        module->free_after = NULL;
    }

    ret = ompi_comm_dup(comm, &module->comm);
    if (OMPI_SUCCESS != ret) goto error;

    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                        "portals4 component creating window with id %d",
                        ompi_comm_get_cid(module->comm));

    asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm));
    ompi_win_set_name(win, name);
    free(name);

    /* share everyone's displacement units. Only do an allgather if
       strictly necessary, since it requires O(p) state. */
    tmp = disp_unit;
    ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0,
                                          module->comm,
                                          module->comm->c_coll.coll_bcast_module);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: MPI_Bcast failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    tmp = (tmp == disp_unit) ? 1 : 0;
    ret = module->comm->c_coll.coll_allreduce(MPI_IN_PLACE, &tmp, 1, MPI_INT, MPI_LAND,
                                              module->comm, module->comm->c_coll.coll_allreduce_module);
    if (OMPI_SUCCESS != ret) goto error;
    if (tmp == 1) {
        module->disp_unit = disp_unit;
        module->disp_units = NULL;
    } else {
        module->disp_unit = -1;
        module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm));
        ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT,
                                                  module->disp_units, 1, MPI_INT,
                                                  module->comm,
                                                  module->comm->c_coll.coll_allgather_module);
        if (OMPI_SUCCESS != ret) goto error;
    }

    module->ni_h = mca_osc_portals4_component.matching_ni_h;
    module->pt_idx = mca_osc_portals4_component.matching_pt_idx;

    ret = PtlCTAlloc(module->ni_h, &(module->ct_h));
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
    md.eq_handle = mca_osc_portals4_component.matching_eq_h;
    md.ct_handle = module->ct_h;
    ret = PtlMDBind(module->ni_h, &md, &module->md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
    md.eq_handle = mca_osc_portals4_component.matching_eq_h;
    md.ct_handle = module->ct_h;
    ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    if (MPI_WIN_FLAVOR_DYNAMIC == flavor) {
        me.start = 0;
        me.length = PTL_SIZE_MAX;
    } else {
        me.start = *base;
        me.length = size;
    }
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = module->comm->c_contextid;
    me.ignore_bits = 0;

    ret = PtlMEAppend(module->ni_h,
                      module->pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &module->data_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    me.start = &module->state;
    me.length = sizeof(module->state);
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = module->comm->c_contextid | OSC_PORTALS4_MB_CONTROL;
    me.ignore_bits = 0;

    ret = PtlMEAppend(module->ni_h,
                      module->pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &module->control_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    module->opcount = 0;
    module->match_bits = module->comm->c_contextid;
    module->atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
        mca_osc_portals4_component.matching_atomic_max :
        MIN(mca_osc_portals4_component.matching_atomic_max,
            mca_osc_portals4_component.matching_atomic_ordered_size);
    module->fetch_atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
        mca_osc_portals4_component.matching_fetch_atomic_max :
        MIN(mca_osc_portals4_component.matching_fetch_atomic_max,
            mca_osc_portals4_component.matching_atomic_ordered_size);

    module->zero = 0;
    module->one = 1;
    module->start_group = NULL;
    module->post_group = NULL;

    module->state.post_count = 0;
    module->state.complete_count = 0;
    if (check_config_value_bool("no_locks", info)) {
        module->state.lock = LOCK_ILLEGAL;
    } else {
        module->state.lock = LOCK_UNLOCKED;
    }

    OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t);

    module->passive_target_access_epoch = false;

#if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32
    *model = MPI_WIN_UNIFIED;
#else
    *model = MPI_WIN_SEPARATE;
#endif

    win->w_osc_module = &module->super;

    PtlAtomicSync();

    /* Make sure that everyone's ready to receive. */
    module->comm->c_coll.coll_barrier(module->comm,
                                      module->comm->c_coll.coll_barrier_module);

    return OMPI_SUCCESS;

 error:
    /* BWB: FIX ME: This is all wrong... */
    if (0 != module->ct_h) PtlCTFree(module->ct_h);
    if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h);
    if (0 != module->req_md_h) PtlMDRelease(module->req_md_h);
    if (0 != module->md_h) PtlMDRelease(module->md_h);
    if (NULL != module->comm) ompi_comm_free(&module->comm);
    if (NULL != module) free(module);

    return ret;
}
Esempio n. 21
0
/* called when a receive should be progressed */
int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request = 
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate expected: %ld %ld", 
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
            ptl_md_t md;

            md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ompi_mtl_portals4.eager_limit,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                PtlMDRelease(ptl_request->md_h);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }

        } else {
            /* make sure the data is in the right place */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OMPI_SUCCESS != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_REPLY:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got reply event",
                             ptl_request->opcount, ptl_request->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            PtlMDRelease(ptl_request->md_h);
            goto callback_error;
        }
        /* set the status - most of this filled in right after issuing
           the PtlGet */
        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (ompi_mtl_portals4.protocol == rndv) {
            ptl_request->super.super.ompi_req->req_status._ucount +=
                ompi_mtl_portals4.eager_limit;
        }

        /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
        ret = ompi_mtl_datatype_unpack(ptl_request->convertor, 
                                       ptl_request->delivery_ptr, 
                                       ptl_request->super.super.ompi_req->req_status._ucount);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                __FILE__, __LINE__, ret);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
        }
        PtlMDRelease(ptl_request->md_h);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, reply",
                             ptl_request->opcount, ptl_request->hdr_data));
        ptl_request->super.super.completion_callback(&ptl_request->super.super);
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate unexpected: %ld %ld %d", 
                                msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor, 
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (ret < 0) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (PTL_OK != ret) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {
            ptl_md_t md;

            if (ev->mlength > 0) {
                /* if rndv or triggered, copy the eager part to the right place */
                memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
            }

            md.start = (char*) ptl_request->delivery_ptr + ev->mlength;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ev->mlength;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) getting long data",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ev->mlength,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                PtlMDRelease(ptl_request->md_h);
                goto callback_error;
            }
        }

        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = 
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}