Beispiel #1
0
static void postME(ptl_handle_ni_t  ni,
                   ptl_pt_index_t   index,
                   void            *start,
                   ptl_size_t       length,
                   ptl_process_t    src,
                   int              tag,
                   ptl_handle_me_t *mh)
{
    int      rc;
    ptl_me_t me;

    unsigned int options = PTL_ME_OP_PUT | PTL_ME_ACK_DISABLE |
                           PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE |
                           PTL_ME_EVENT_LINK_DISABLE;

    me.start       = (char *)start;
    me.length      = length;
    me.ct_handle   = PTL_CT_NONE;
    me.min_free    = 0;
    me.uid         = PTL_UID_ANY;
    me.options     = options;
    me.match_id    = src;
    me.match_bits  = tag;
    me.ignore_bits = 0;

    rc = PtlMEAppend(ni, index, &me, PTL_PRIORITY_LIST, NULL, mh);
    LIBTEST_CHECK(rc, "Error in libtest_CreateME(): PtlMEAppend");
}
Beispiel #2
0
static inline int
ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
{
    ptl_match_bits_t match_bits = MTL_PORTALS4_SHORT_MSG;
    ptl_match_bits_t ignore_bits;
    ptl_me_t me;
    int ret;

    ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK;

    me.start = block->start;
    me.length = ompi_mtl_portals4.recv_short_size;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = ompi_mtl_portals4.short_limit;
    me.uid = ompi_mtl_portals4.uid;
    me.options =
        PTL_ME_OP_PUT |
        PTL_ME_EVENT_COMM_DISABLE |
        PTL_ME_MANAGE_LOCAL |
        PTL_ME_MAY_ALIGN;
    if (ompi_mtl_portals4.use_logical) {
        me.match_id.rank = PTL_RANK_ANY;
    } else {
        me.match_id.phys.nid = PTL_NID_ANY;
        me.match_id.phys.pid = PTL_PID_ANY;
    }
    me.match_bits = match_bits;
    me.ignore_bits = ignore_bits;

    OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
    block->status = BLOCK_STATUS_WAITING_LINK;
    OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.recv_idx,
                      &me,
                      PTL_OVERFLOW_LIST,
                      &block->request,
                      &block->me_h);
    if (OPAL_LIKELY(ret == PTL_OK)) {
        ret = OMPI_SUCCESS;
    } else {
        ret = ompi_mtl_portals4_get_error(ret);
    }

    return ret;
}
Beispiel #3
0
static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl_match_bits_t match_bits, MPID_Request *sreq)
{
    int i, ret, was_incomplete;
    MPID_nem_ptl_vc_area *vc_ptl;
    ptl_me_t me;

    vc_ptl = VC_PTL(vc);

    me.start = buf;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
                   PTL_ME_EVENT_UNLINK_DISABLE );
    me.match_id = vc_ptl->id;
    me.match_bits = match_bits;
    me.ignore_bits = 0;
    me.min_free = 0;

    /* allocate enough handles to cover all get operations */
    REQ_PTL(sreq)->get_me_p = MPIU_Malloc(sizeof(ptl_handle_me_t) *
                                          ((left_to_send / MPIDI_nem_ptl_ni_limits.max_msg_size) + 1));

    /* queue up as many entries as necessary to describe the entire message */
    for (i = 0; left_to_send > 0; i++) {
        /* send up to the maximum allowed by the portals interface */
        if (left_to_send > MPIDI_nem_ptl_ni_limits.max_msg_size)
            me.length = MPIDI_nem_ptl_ni_limits.max_msg_size;
        else
            me.length = left_to_send;

        ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                          &REQ_PTL(sreq)->get_me_p[i]);
        DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
        MPIU_Assert(ret == 0);
        /* increment the cc for each get operation */
        MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
        MPIU_Assert(was_incomplete);

        /* account for what has been sent */
        me.start = (char *)me.start + me.length;
        left_to_send -= me.length;
    }
}
Beispiel #4
0
static inline int
ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
{
    ptl_match_bits_t match_bits = MTL_PORTALS4_SHORT_MSG;
    ptl_match_bits_t ignore_bits;
    ptl_me_t me;
    int ret;

    opal_list_remove_item(&ompi_mtl_portals4.waiting_recv_short_blocks, &block->base);

    ignore_bits = MTL_PORTALS4_CONTEXT_MASK | MTL_PORTALS4_SOURCE_MASK | MTL_PORTALS4_TAG_MASK;

    me.start = block->start;
    me.length = ompi_mtl_portals4.recv_short_size;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = ompi_mtl_portals4.eager_limit;
    me.uid = ompi_mtl_portals4.uid;
    me.options = 
        PTL_ME_OP_PUT | 
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_MANAGE_LOCAL | 
        PTL_ME_MAY_ALIGN;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = match_bits;
    me.ignore_bits = ignore_bits;

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.recv_idx,
                      &me,
                      PTL_OVERFLOW_LIST,
                      &block->request,
                      &block->me_h);
    if (OPAL_LIKELY(ret == PTL_OK)) {
        ret = OMPI_SUCCESS;
        opal_list_append(&ompi_mtl_portals4.active_recv_short_blocks,
                         &block->base);
    } else {
        ret = ompi_mtl_portals4_get_error(ret);
    }

    return ret;
}
Beispiel #5
0
int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt,
                              ptl_handle_me_t * me_handle)
{
    int ret;
    ptl_me_t me;
    ptl_process_t id;
    MPIDI_STATE_DECL(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);

    MPIDI_FUNC_ENTER(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);

    id.phys.nid = PTL_NID_ANY;
    id.phys.pid = PTL_PID_ANY;

    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = (PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE |
                  PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE);
    me.match_id = id;
    me.match_bits = 0;
    me.ignore_bits = 0;
    me.min_free = 0;

    while (1) {
        ret = PtlMEAppend(ni_handle, pt, &me, PTL_PRIORITY_LIST, NULL, me_handle);
        if (ret != PTL_NO_SPACE)
            break;
    }
    RPTLU_ERR_POP(ret, "Error appending empty buffer to priority list\n");

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
    return ret;

  fn_fail:
    goto fn_exit;
}
Beispiel #6
0
static int append_overflow(int i)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    ptl_me_t me;
    ptl_process_t id_any;
    MPIDI_STATE_DECL(MPID_STATE_APPEND_OVERFLOW);

    MPIDI_FUNC_ENTER(MPID_STATE_APPEND_OVERFLOW);

    MPIU_Assert(i >= 0 && i < NUM_OVERFLOW_ME);
    
    id_any.phys.pid = PTL_PID_ANY;
    id_any.phys.nid = PTL_NID_ANY;

    me.start = overflow_buf[i];
    me.length = OVERFLOW_LENGTH;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = ( PTL_ME_OP_PUT | PTL_ME_MANAGE_LOCAL | PTL_ME_NO_TRUNCATE | PTL_ME_MAY_ALIGN |
                   PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE );
    me.match_id = id_any;
    me.match_bits = 0;
    me.ignore_bits = ~((ptl_match_bits_t)0);
    me.min_free = PTL_LARGE_THRESHOLD;
    
    /* if there is no space to append the entry, process outstanding events and try again */
    ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
                      &overflow_me_handle[i]);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_APPEND_OVERFLOW);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #7
0
static int
btl_portals4_init_interface(void)
{
    mca_btl_portals4_module_t *portals4_btl;
    unsigned int ret, interface;
    ptl_md_t md;
    ptl_me_t me;

// The initialisation of EQ, PT and ME must be done after the SetMap !
    for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
        portals4_btl = mca_btl_portals4_component.btls[interface];

        /* create event queue */
        ret = PtlEQAlloc(portals4_btl->portals_ni_h,
                     mca_btl_portals4_component.recv_queue_size,
                     &portals4_btl->recv_eq_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        mca_btl_portals4_component.eqs_h[interface] = portals4_btl->recv_eq_h;
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlEQAlloc (recv_eq=%d) OK for NI %d\n", portals4_btl->recv_eq_h, interface));

        /* Create recv_idx portal table entry */
        ret = PtlPTAlloc(portals4_btl->portals_ni_h,
                     PTL_PT_ONLY_TRUNCATE,
                     portals4_btl->recv_eq_h,
                     REQ_BTL_TABLE_ID,
                     &portals4_btl->recv_idx);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlPTAlloc (recv_idx) OK for NI %d recv_idx=%d", interface, portals4_btl->recv_idx));

        if (portals4_btl->recv_idx != REQ_BTL_TABLE_ID) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc did not allocate the requested PT: %d",
                           __FILE__, __LINE__, portals4_btl->recv_idx);
            goto error;
        }

        /* bind zero-length md for sending acks */
        md.start     = NULL;
        md.length    = 0;
        md.options   = 0;
        md.eq_handle = PTL_EQ_NONE;
        md.ct_handle = PTL_CT_NONE;

        ret = PtlMDBind(portals4_btl->portals_ni_h,
                    &md,
                    &portals4_btl->zero_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlMDBind (zero-length md=%d) OK for NI %d", portals4_btl->zero_md_h, interface));

        /* Bind MD across all memory */
        md.start = 0;
        md.length = PTL_SIZE_MAX;
        md.options = 0;
        md.eq_handle = portals4_btl->recv_eq_h;
        md.ct_handle = PTL_CT_NONE;

        ret = PtlMDBind(portals4_btl->portals_ni_h,
                    &md,
                    &portals4_btl->send_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed for NI %d: %d\n",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }

        /* Handle long overflows */
        me.start = NULL;
        me.length = 0;
        me.ct_handle = PTL_CT_NONE;
        me.min_free = 0;
        me.uid = PTL_UID_ANY;
        me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_LINK_DISABLE |
            PTL_ME_EVENT_COMM_DISABLE |
            PTL_ME_EVENT_UNLINK_DISABLE;
        if (mca_btl_portals4_component.use_logical) {
            me.match_id.rank = PTL_RANK_ANY;
        } else {
            me.match_id.phys.nid = PTL_NID_ANY;
            me.match_id.phys.pid = PTL_PID_ANY;
        }
        me.match_bits = BTL_PORTALS4_LONG_MSG;
        me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK |
            BTL_PORTALS4_SOURCE_MASK |
            BTL_PORTALS4_TAG_MASK;
        ret = PtlMEAppend(portals4_btl->portals_ni_h,
                      portals4_btl->recv_idx,
                      &me,
                      PTL_OVERFLOW_LIST,
                      NULL,
                      &portals4_btl->long_overflow_me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed for NI %d: %d",
                            __FILE__, __LINE__, interface, ret);
            goto error;
        }
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK for NI %d", interface));
    }

    ret = mca_btl_portals4_recv_enable(portals4_btl);
    if (PTL_OK != ret) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: Initialization of recv buffer failed: %d",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    return OPAL_SUCCESS;

 error:
    opal_output_verbose(1, opal_btl_base_framework.framework_output, "Error in btl_portals4_init_interface");

    for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
        portals4_btl = mca_btl_portals4_component.btls[interface];
        if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl);
    }
    mca_btl_portals4_component.num_btls = 0;
    if (NULL != mca_btl_portals4_component.btls)  free(mca_btl_portals4_component.btls);
    if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
    mca_btl_portals4_component.btls = NULL;
    mca_btl_portals4_component.eqs_h = NULL;

    return OPAL_ERROR;
}
Beispiel #8
0
mca_btl_base_registration_handle_t *
mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
                              mca_btl_base_endpoint_t *endpoint,
                              void *base,
                              size_t size,
                              uint32_t flags)
{
    struct mca_btl_portals4_module_t   *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
    mca_btl_base_registration_handle_t *handle = NULL;
    ptl_me_t me;
    int ret;

    handle = (mca_btl_base_registration_handle_t *)malloc(sizeof(mca_btl_base_registration_handle_t));
    if (!handle) {
        return NULL;
    }

    handle->key = OPAL_THREAD_ADD_FETCH64(&(portals4_btl->portals_rdma_key), 1);
    handle->remote_offset = 0;

    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
        "mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld flags=%d",
        portals4_btl->interface_num, base, size, (void *)handle, handle->key, flags));

    /* create a match entry */
    me.start = base;
    me.length = size;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_GET |
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_COMM_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE;

    if (mca_btl_portals4_component.use_logical) {
        me.match_id.rank = endpoint->ptl_proc.rank;
    } else {
        me.match_id.phys.nid = endpoint->ptl_proc.phys.nid;
        me.match_id.phys.pid = endpoint->ptl_proc.phys.pid;
    }
    me.match_bits = handle->key;
    me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK |
        BTL_PORTALS4_CONTEXT_MASK |
        BTL_PORTALS4_SOURCE_MASK;
    me.ignore_bits = 0;

    ret = PtlMEAppend(portals4_btl->portals_ni_h,
                      portals4_btl->recv_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      handle,
                      &(handle->me_h));
    if (PTL_OK != ret) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
        return NULL;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
        "PtlMEAppend (mca_btl_portals4_register_mem) handle=%p, me_h=%d start=%p length=%ld rank=%x nid=%x pid=%x match_bits=%lx\n",
        (void *)handle, handle->me_h, me.start, me.length,
        me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
    return handle;
}
Beispiel #9
0
int
ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
                        struct ompi_communicator_t *comm,
                        int src,
                        int tag,
                        struct opal_convertor_t *convertor,
                        mca_mtl_request_t *mtl_request)
{
    ptl_match_bits_t match_bits, ignore_bits;
    int ret = OMPI_SUCCESS;
    ptl_process_t remote_proc;
    mca_mtl_base_endpoint_t *endpoint = NULL;
    ompi_mtl_portals4_recv_request_t *ptl_request = 
        (ompi_mtl_portals4_recv_request_t*) mtl_request;
    void *start;
    size_t length;
    bool free_after;
    ptl_me_t me;

    if  (MPI_ANY_SOURCE == src) {
        remote_proc.phys.nid = PTL_NID_ANY;
        remote_proc.phys.pid = PTL_PID_ANY;
    } else {
        ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
        endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml;
        remote_proc = endpoint->ptl_proc;
    }

    MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
                               src, tag);

    ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_output,
                            "%s:%d: PtlMEAppend failed: %d",
                            __FILE__, __LINE__, ret);
        return ret;
    }

#if OPAL_ENABLE_DEBUG
    ptl_request->opcount = ++ompi_mtl_portals4.recv_opcount;
    ptl_request->hdr_data = 0;
#endif
    ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
    ptl_request->buffer_ptr = (free_after) ? start : NULL;
    ptl_request->convertor = convertor;
    ptl_request->delivery_ptr = start;
    ptl_request->delivery_len = length;
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                         "Recv %d from %x,%x of length %d (0x%lx, 0x%lx)\n",
                         ptl_request->opcount,
                         remote_proc.phys.nid, remote_proc.phys.pid, 
                         (int)length, match_bits, ignore_bits));

    me.start = start;
    me.length = length;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id = remote_proc;
    me.match_bits = match_bits;
    me.ignore_bits = ignore_bits;

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.send_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      ptl_request,
                      &ptl_request->me_h);
    if (PTL_OK != ret) {
        if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
        opal_output_verbose(1, ompi_mtl_base_output,
                            "%s:%d: PtlMEAppend failed: %d",
                            __FILE__, __LINE__, ret);
        return ompi_mtl_portals4_get_error(ret);
    }

    return OMPI_SUCCESS; 
}
Beispiel #10
0
int
ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
                        struct ompi_communicator_t *comm,
                        int src,
                        int tag,
                        struct opal_convertor_t *convertor,
                        mca_mtl_request_t *mtl_request)
{
    ptl_match_bits_t match_bits, ignore_bits;
    int ret = OMPI_SUCCESS;
    ptl_process_t remote_proc;
    ompi_mtl_portals4_recv_request_t *ptl_request =
        (ompi_mtl_portals4_recv_request_t*) mtl_request;
    void *start;
    size_t length;
    bool free_after;
    ptl_me_t me;

    if  (MPI_ANY_SOURCE == src) {
        if (ompi_mtl_portals4.use_logical) {
            remote_proc.rank = PTL_RANK_ANY;
        } else {
            remote_proc.phys.nid = PTL_NID_ANY;
            remote_proc.phys.pid = PTL_PID_ANY;
        }
    } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
        remote_proc.rank = src;
    } else {
        ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
        remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
    }

    MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
                               src, tag);

    ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    ptl_request->super.type = portals4_req_recv;
    ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
#if OPAL_ENABLE_DEBUG
    ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
    ptl_request->hdr_data = 0;
#endif
    ptl_request->buffer_ptr = (free_after) ? start : NULL;
    ptl_request->convertor = convertor;
    ptl_request->delivery_ptr = start;
    ptl_request->delivery_len = length;
    ptl_request->req_started = false;
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
    ptl_request->pending_reply = 0;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n",
                         ptl_request->opcount,
                         remote_proc.phys.nid, remote_proc.phys.pid,
                         (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request));

    me.start = start;
    me.length = length;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    me.options =
        PTL_ME_OP_PUT |
        PTL_ME_USE_ONCE |
        PTL_ME_EVENT_UNLINK_DISABLE;
    if (length <= ompi_mtl_portals4.short_limit) {
        me.options |= PTL_ME_EVENT_LINK_DISABLE;
    }
    me.match_id = remote_proc;
    me.match_bits = match_bits;
    me.ignore_bits = ignore_bits;

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.recv_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      ptl_request,
                      &ptl_request->me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d",
                            __FILE__, __LINE__, ret);
        return ompi_mtl_portals4_get_error(ret);
    }

    /* if a long message, spin until we either have a comm event or a
       link event, guaranteeing progress for long unexpected
       messages. */
    if (length > ompi_mtl_portals4.short_limit) {
        while (true != ptl_request->req_started) {
            ompi_mtl_portals4_progress();
        }
    }

    return OMPI_SUCCESS;
}
Beispiel #11
0
static int
setup_sync_handles(struct ompi_communicator_t   *comm,
                   ompi_coll_portals4_request_t *request,
                   mca_coll_portals4_module_t   *portals4_module)
{
    int ret, line;

    ptl_me_t  me;

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank));

    /**********************************/
    /* Setup Sync Handles             */
    /**********************************/
    COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm),
                           0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)",
                 request->u.scatter.my_rank, request->u.scatter.sync_match_bits));

    ret = PtlCTAlloc(mca_coll_portals4_component.ni_h,
                     &request->u.scatter.sync_cth);
    if (PTL_OK != ret) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        line = __LINE__;
        goto err_hdlr;
    }

    request->u.scatter.sync_mdh = mca_coll_portals4_component.zero_md_h;

    me.start = NULL;
    me.length = 0;
    me.ct_handle = request->u.scatter.sync_cth;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE |
                 PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE |
                 PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = request->u.scatter.sync_match_bits;
    me.ignore_bits = 0;
    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
                      mca_coll_portals4_component.pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &request->u.scatter.sync_meh);
    if (PTL_OK != ret) {
        ret = OMPI_ERROR;
        line = __LINE__;
        goto err_hdlr;
    }

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank));

    return OMPI_SUCCESS;

err_hdlr:
    opal_output(ompi_coll_base_framework.framework_output,
                "%s:%4d:%4d\tError occurred ret=%d, rank %2d",
                __FILE__, __LINE__, line, ret, request->u.scatter.my_rank);

    return ret;
}
Beispiel #12
0
int
ompi_mtl_portals4_flowctl_init(void)
{
    ptl_me_t me;
    int ret;

    ompi_mtl_portals4.flowctl.flowctl_active = false;

    OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_t);

    OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.pending_fl, opal_free_list_t);
    opal_free_list_init(&ompi_mtl_portals4.flowctl.pending_fl,
                        sizeof(ompi_mtl_portals4_pending_request_t),
                        opal_cache_line_size,
                        OBJ_CLASS(ompi_mtl_portals4_pending_request_t),
                        0, 0, 1, -1, 1, NULL, 0, NULL, NULL, NULL);

    ompi_mtl_portals4.flowctl.max_send_slots = (ompi_mtl_portals4.send_queue_size - 3) / 3;
    ompi_mtl_portals4.flowctl.send_slots = ompi_mtl_portals4.flowctl.max_send_slots;

    ompi_mtl_portals4.flowctl.alert_req.type = portals4_req_flowctl;
    ompi_mtl_portals4.flowctl.alert_req.event_callback = flowctl_alert_callback;

    ompi_mtl_portals4.flowctl.fanout_req.type = portals4_req_flowctl;
    ompi_mtl_portals4.flowctl.fanout_req.event_callback = flowctl_fanout_callback;

    ompi_mtl_portals4.flowctl.epoch_counter = -1;

    ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
                     PTL_PT_ONLY_TRUNCATE,
                     ompi_mtl_portals4.send_eq_h,
                     REQ_FLOWCTL_TABLE_ID,
                     &ompi_mtl_portals4.flowctl_idx);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
                     &ompi_mtl_portals4.flowctl.trigger_ct_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* everyone creates the trigger ME, even if the root may be the
       only to use it */
    me.start = NULL;
    me.length = 0;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    if (ompi_mtl_portals4.use_logical) {
        me.match_id.rank = PTL_RANK_ANY;
    } else {
        me.match_id.phys.nid = PTL_NID_ANY;
        me.match_id.phys.pid = PTL_PID_ANY;
    }
    me.ignore_bits = 0;

    me.options = PTL_ME_OP_PUT | 
        PTL_ME_ACK_DISABLE | 
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE |
        PTL_ME_EVENT_COMM_DISABLE |
        PTL_ME_EVENT_CT_COMM;
    me.ct_handle = ompi_mtl_portals4.flowctl.trigger_ct_h;
    me.match_bits = MTL_PORTALS4_FLOWCTL_TRIGGER;
    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.flowctl_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &ompi_mtl_portals4.flowctl.trigger_me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }


    /* Alert CT/ME for broadcasting out alert when root receives a
       trigger */
    ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
                     &ompi_mtl_portals4.flowctl.alert_ct_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    me.options = PTL_ME_OP_PUT | 
        PTL_ME_ACK_DISABLE | 
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE |
        PTL_ME_EVENT_CT_COMM;
    me.ct_handle = ompi_mtl_portals4.flowctl.alert_ct_h;
    me.match_bits = MTL_PORTALS4_FLOWCTL_ALERT;
    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.flowctl_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      &ompi_mtl_portals4.flowctl.alert_req,
                      &ompi_mtl_portals4.flowctl.alert_me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Fanin CT/ME for receiving fan-in for restart */
    ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
                     &ompi_mtl_portals4.flowctl.fanin_ct_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    me.options = PTL_ME_OP_PUT | 
        PTL_ME_ACK_DISABLE | 
        PTL_ME_EVENT_COMM_DISABLE |
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE |
        PTL_ME_EVENT_CT_COMM;
    me.ct_handle = ompi_mtl_portals4.flowctl.fanin_ct_h;
    me.match_bits = MTL_PORTALS4_FLOWCTL_FANIN;
    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.flowctl_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &ompi_mtl_portals4.flowctl.fanin_me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Fan-out CT/ME for sending restart messages after fan-in */
    ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
                     &ompi_mtl_portals4.flowctl.fanout_ct_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    me.options = PTL_ME_OP_PUT | 
        PTL_ME_ACK_DISABLE | 
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE |
        PTL_ME_EVENT_CT_COMM;
    me.ct_handle = ompi_mtl_portals4.flowctl.fanout_ct_h;
    me.match_bits = MTL_PORTALS4_FLOWCTL_FANOUT;
    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.flowctl_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      &ompi_mtl_portals4.flowctl.fanout_req,
                      &ompi_mtl_portals4.flowctl.fanout_me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    ompi_mtl_portals4.flowctl.num_children = 0;

    gettimeofday(&ompi_mtl_portals4.flowctl.tv, NULL);
    ompi_mtl_portals4.flowctl.backoff_count = 0;

    ret = OMPI_SUCCESS;

 error:
    return ret;
}
Beispiel #13
0
static inline int
ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
                             int localrank,
                             ptl_process_t ptl_proc,
                             ompi_mtl_portals4_isend_request_t *ptl_request)
{
    int ret;
    ptl_match_bits_t match_bits;
    ptl_me_t me;
    ptl_hdr_data_t hdr_data;
    ptl_size_t put_length;

    MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
                               MTL_PORTALS4_LONG_MSG);

    MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0);

    me.start = start;
    me.length = length;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    me.options =
        PTL_ME_OP_GET |
        PTL_ME_USE_ONCE |
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id = ptl_proc;
    me.match_bits = hdr_data;
    me.ignore_bits = 0;

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.read_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      ptl_request,
                      &ptl_request->me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d",
                            __FILE__, __LINE__, ret);
        return ompi_mtl_portals4_get_error(ret);
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Send %lu long send with hdr_data 0x%lx (0x%lx)",
                         ptl_request->opcount, hdr_data, match_bits));

    put_length = (rndv == ompi_mtl_portals4.protocol) ?
        (ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;

    ret = PtlPut(ompi_mtl_portals4.send_md_h,
                 (ptl_size_t) start,
                 put_length,
                 PTL_ACK_REQ,
                 ptl_proc,
                 ompi_mtl_portals4.recv_idx,
                 match_bits,
                 0,
                 ptl_request,
                 hdr_data);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d",
                            __FILE__, __LINE__, ret);
	PtlMEUnlink(ptl_request->me_h);
        return ompi_mtl_portals4_get_error(ret);
    }

    return OMPI_SUCCESS;
}
Beispiel #14
0
static inline int
ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
                              void *start, int length, int contextid, int tag,
                              int localrank,
                              ptl_process_t ptl_proc,
                              ompi_mtl_portals4_isend_request_t *ptl_request)
{
    int ret;
    ptl_match_bits_t match_bits;
    ptl_me_t me;
    ptl_hdr_data_t hdr_data;

    MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
                               MTL_PORTALS4_SHORT_MSG);

    MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length,
                              (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0);

    if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
        me.start = NULL;
        me.length = 0;
        me.ct_handle = PTL_CT_NONE;
        me.min_free = 0;
        me.uid = ompi_mtl_portals4.uid;
        me.options =
            PTL_ME_OP_PUT |
            PTL_ME_USE_ONCE |
            PTL_ME_EVENT_LINK_DISABLE |
            PTL_ME_EVENT_UNLINK_DISABLE;
        me.match_id = ptl_proc;
        me.match_bits = hdr_data;
        me.ignore_bits = 0;

        ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                          ompi_mtl_portals4.read_idx,
                          &me,
                          PTL_PRIORITY_LIST,
                          ptl_request,
                          &ptl_request->me_h);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PtlMEAppend failed: %d",
                                __FILE__, __LINE__, ret);
            return ompi_mtl_portals4_get_error(ret);
        }

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Send %lu short sync send with hdr_data 0x%lx (0x%lx)",
                             ptl_request->opcount, hdr_data, match_bits));
    } else {
        ptl_request->event_count = 1;
        ptl_request->me_h = PTL_INVALID_HANDLE;

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Send %lu short send with hdr_data 0x%lx (0x%lx)",
                             ptl_request->opcount, hdr_data, match_bits));
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Send %lu, start: %p",
                         ptl_request->opcount, start));

    ret = PtlPut(ompi_mtl_portals4.send_md_h,
                 (ptl_size_t) start,
                 length,
                 PTL_ACK_REQ,
                 ptl_proc,
                 ompi_mtl_portals4.recv_idx,
                 match_bits,
                 0,
                 ptl_request,
                 hdr_data);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d",
                            __FILE__, __LINE__, ret);
        if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
            PtlMEUnlink(ptl_request->me_h);
        }
        return ompi_mtl_portals4_get_error(ret);
    }

    return OMPI_SUCCESS;
}
Beispiel #15
0
static int
portals4_init_interface(void)
{
    unsigned int ret;
    ptl_md_t md;
    ptl_me_t me;

    /* create event queues */
    ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
                     ompi_mtl_portals4.send_queue_size,
                     &ompi_mtl_portals4.send_eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
                     ompi_mtl_portals4.recv_queue_size,
                     &ompi_mtl_portals4.recv_eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Create send and long message (read) portal table entries */
    ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
                     PTL_PT_ONLY_USE_ONCE |
                     PTL_PT_ONLY_TRUNCATE |
                     PTL_PT_FLOWCTRL,
                     ompi_mtl_portals4.recv_eq_h,
                     REQ_RECV_TABLE_ID,
                     &ompi_mtl_portals4.recv_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    if (ompi_mtl_portals4.recv_idx != REQ_RECV_TABLE_ID) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n",
                            __FILE__, __LINE__, ompi_mtl_portals4.recv_idx);
        goto error;
    }

    ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
                     PTL_PT_ONLY_USE_ONCE |
                     PTL_PT_ONLY_TRUNCATE,
                     ompi_mtl_portals4.send_eq_h,
                     REQ_READ_TABLE_ID,
                     &ompi_mtl_portals4.read_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    if (ompi_mtl_portals4.read_idx != REQ_READ_TABLE_ID) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n",
                            __FILE__, __LINE__, ompi_mtl_portals4.read_idx);
        goto error;
    }

    /* bind zero-length md for sending acks */
    md.start     = NULL;
    md.length    = 0;
    md.options   = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                    &md,
                    &ompi_mtl_portals4.zero_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Bind MD across all memory */
    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = 0;
    md.eq_handle = ompi_mtl_portals4.send_eq_h;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                    &md,
                    &ompi_mtl_portals4.send_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* Handle long overflows */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    me.options = PTL_ME_OP_PUT |
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_COMM_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE;
    if (ompi_mtl_portals4.use_logical) {
        me.match_id.rank = PTL_RANK_ANY;
    } else {
        me.match_id.phys.nid = PTL_NID_ANY;
        me.match_id.phys.pid = PTL_PID_ANY;
    }
    me.match_bits = MTL_PORTALS4_LONG_MSG;
    me.ignore_bits = MTL_PORTALS4_CONTEXT_MASK |
        MTL_PORTALS4_SOURCE_MASK |
        MTL_PORTALS4_TAG_MASK;
    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.recv_idx,
                      &me,
                      PTL_OVERFLOW_LIST,
                      NULL,
                      &ompi_mtl_portals4.long_overflow_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* attach short unex recv blocks */
    ret = ompi_mtl_portals4_recv_short_init();
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: short receive block initialization failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    ompi_mtl_portals4.opcount = 0;
#if OPAL_ENABLE_DEBUG
    ompi_mtl_portals4.recv_opcount = 0;
#endif

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    ret = ompi_mtl_portals4_flowctl_init();
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: ompi_mtl_portals4_flowctl_init failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
#endif

    return OMPI_SUCCESS;

 error:
    if (!PtlHandleIsEqual(ompi_mtl_portals4.long_overflow_me_h, PTL_INVALID_HANDLE)) {
        PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.zero_md_h, PTL_INVALID_HANDLE)) {
        PtlMDRelease(ompi_mtl_portals4.zero_md_h);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.send_md_h, PTL_INVALID_HANDLE)) {
        PtlMDRelease(ompi_mtl_portals4.send_md_h);
    }
    if (ompi_mtl_portals4.read_idx != (ptl_pt_index_t) ~0UL) {
        PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
    }
    if (ompi_mtl_portals4.recv_idx != (ptl_pt_index_t) ~0UL) {
        PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.send_eq_h, PTL_INVALID_HANDLE)) {
        PtlEQFree(ompi_mtl_portals4.send_eq_h);
    }
    if (!PtlHandleIsEqual(ompi_mtl_portals4.recv_eq_h, PTL_INVALID_HANDLE)) {
        PtlEQFree(ompi_mtl_portals4.recv_eq_h);
    }
    return OMPI_ERROR;
}
/*
    /!\ Called for each processes /!\
 */
static int
portals4_init_query(bool enable_progress_threads,
        bool enable_mpi_threads)
{
    int ret;
    ptl_md_t md;
    ptl_me_t me;

    /* Initialize Portals and create a physical, matching interface */
    ret = PtlInit();
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlNIInit(PTL_IFACE_DEFAULT,
            PTL_NI_PHYSICAL | PTL_NI_MATCHING,
            PTL_PID_ANY,
            NULL,
            &mca_coll_portals4_component.ni_limits,
            &mca_coll_portals4_component.ni_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlNIInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }


    ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    /* FIX ME: Need to make sure our ID matches with the MTL... */
    ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetUid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlEQAlloc(mca_coll_portals4_component.ni_h,
            MCA_COLL_PORTALS4_EQ_SIZE,
            &mca_coll_portals4_component.eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlEQAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_TABLE_ID,
            &mca_coll_portals4_component.pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_FINISH_TABLE_ID,
            &mca_coll_portals4_component.finish_pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    /* Bind MD/MDs across all memory.  We prefer (for obvious reasons)
       to have a single MD across all of memory */
    memset(&md, 0, sizeof(ptl_md_t));
    md.start = 0;
    md.length = 0;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.zero_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.data_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length));

    /* setup finish ack ME */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = 0;
    me.ignore_bits = 0;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.finish_pt_idx,
            &me,
            PTL_PRIORITY_LIST,
            NULL,
            &mca_coll_portals4_component.finish_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* This ME is used for RTR exchange only */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;

    /* Note : the RTR bit must be set to match this ME,
     * this allows to discriminate the RTR from data flow
     * (especially for the Barrier operations)
     */
    COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0);
    me.ignore_bits = ~COLL_PORTALS4_RTR_MASK;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.pt_idx,
            &me,
            PTL_OVERFLOW_LIST,
            NULL,
            &mca_coll_portals4_component.unex_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* activate progress callback */
    ret = opal_progress_register(portals4_progress);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: opal_progress_register failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;

    }
    return OMPI_SUCCESS;

}
int
ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
                                 mca_coll_base_module_t *module)
{
    mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module;
    int ret, i, dim, hibit, mask, num_msgs;
    int size = ompi_comm_size(comm);
    int rank = ompi_comm_rank(comm);
    ptl_ct_event_t ct;
    ptl_handle_ct_t ct_h;
    ptl_handle_me_t me_h;
    ptl_me_t me;
    size_t count;
    ptl_match_bits_t match_bits;
    ptl_handle_md_t md_h;
    void *base;

    ompi_coll_portals4_get_md(0, &md_h, &base);

    count = opal_atomic_add_size_t(&portals4_module->barrier_count, 1);

    ret = PtlCTAlloc(mca_coll_portals4_component.ni_h,
                     &ct_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    }

    COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 
                          0, COLL_PORTALS4_BARRIER, count);

    /* Build "tree" out of hypercube */
    dim = comm->c_cube_dim;
    hibit = opal_hibit(rank, dim);
    --dim;

    /* receive space */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = ct_h;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE |
        PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE |
        PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = match_bits;
    me.ignore_bits = 0;
    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
                      mca_coll_portals4_component.pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* calculate number of children to receive from */
    num_msgs = ompi_coll_portals4_get_nchildren(dim + 1, hibit, rank, size);
    
    /* send to parent when children have sent to us */
    if (rank > 0) {
        int parent = rank & ~(1 << hibit);
        ret = PtlTriggeredPut(md_h,
                              0,
                              0,
                              PTL_NO_ACK_REQ,
                              ompi_coll_portals4_get_peer(comm, parent),
                              mca_coll_portals4_component.pt_idx,
                              match_bits,
                              0,
                              NULL,
                              0,
                              ct_h,
                              num_msgs);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                                "%s:%d: PtlTriggeredPut failed: %d\n",
                                __FILE__, __LINE__, ret);
            return OMPI_ERROR;
        }

        /* we'll need to wait for the parent response before the next set of comms */
        num_msgs++;
    }

    /* send to children when parent (or all children if root) has sent to us */
    for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
        int peer = rank | mask;
        if (peer < size) {
            ret = PtlTriggeredPut(md_h,
                                  0,
                                  0,
                                  PTL_NO_ACK_REQ,
                                  ompi_coll_portals4_get_peer(comm, peer),
                                  mca_coll_portals4_component.pt_idx,
                                  match_bits,
                                  0,
                                  NULL,
                                  0,
                                  ct_h,
                                  num_msgs);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                                    "%s:%d: PtlTriggeredPut failed: %d\n",
                                    __FILE__, __LINE__, ret);
                return OMPI_ERROR;
            }
        }
    }

    /* Wait for all incoming messages */
    ret = PtlCTWait(ct_h, num_msgs, &ct);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "%s:%d: PtlCTWait failed: %d\n",
                            __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* cleanup */
    ret = PtlMEUnlink(me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "%s:%d: PtlMEUnlink failed: %d\n",
                            __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    ret = PtlCTFree(ct_h); 
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "%s:%d: PtlCTFree failed: %d\n",
                            __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
Beispiel #18
0
static int
component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
                 struct ompi_communicator_t *comm, struct ompi_info_t *info,
                 int flavor, int *model)
{
    ompi_osc_portals4_module_t *module = NULL;
    int ret = OMPI_ERROR;
    int tmp;
    ptl_md_t md;
    ptl_me_t me;
    char *name;

    if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED;

    /* create module structure */
    module = (ompi_osc_portals4_module_t*)
        calloc(1, sizeof(ompi_osc_portals4_module_t));
    if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_portals4_module_template,
           sizeof(ompi_osc_base_module_t));

    /* fill in our part */
    if (MPI_WIN_FLAVOR_ALLOCATE == flavor) {
        module->free_after = *base = malloc(size);
        if (NULL == *base) goto error;
    } else {
        module->free_after = NULL;
    }

    ret = ompi_comm_dup(comm, &module->comm);
    if (OMPI_SUCCESS != ret) goto error;

    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                        "portals4 component creating window with id %d",
                        ompi_comm_get_cid(module->comm));

    asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm));
    ompi_win_set_name(win, name);
    free(name);

    /* share everyone's displacement units. Only do an allgather if
       strictly necessary, since it requires O(p) state. */
    tmp = disp_unit;
    ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0,
                                          module->comm,
                                          module->comm->c_coll.coll_bcast_module);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: MPI_Bcast failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    tmp = (tmp == disp_unit) ? 1 : 0;
    ret = module->comm->c_coll.coll_allreduce(MPI_IN_PLACE, &tmp, 1, MPI_INT, MPI_LAND,
                                              module->comm, module->comm->c_coll.coll_allreduce_module);
    if (OMPI_SUCCESS != ret) goto error;
    if (tmp == 1) {
        module->disp_unit = disp_unit;
        module->disp_units = NULL;
    } else {
        module->disp_unit = -1;
        module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm));
        ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT,
                                                  module->disp_units, 1, MPI_INT,
                                                  module->comm,
                                                  module->comm->c_coll.coll_allgather_module);
        if (OMPI_SUCCESS != ret) goto error;
    }

    module->ni_h = mca_osc_portals4_component.matching_ni_h;
    module->pt_idx = mca_osc_portals4_component.matching_pt_idx;

    ret = PtlCTAlloc(module->ni_h, &(module->ct_h));
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
    md.eq_handle = mca_osc_portals4_component.matching_eq_h;
    md.ct_handle = module->ct_h;
    ret = PtlMDBind(module->ni_h, &md, &module->md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
    md.eq_handle = mca_osc_portals4_component.matching_eq_h;
    md.ct_handle = module->ct_h;
    ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    if (MPI_WIN_FLAVOR_DYNAMIC == flavor) {
        me.start = 0;
        me.length = PTL_SIZE_MAX;
    } else {
        me.start = *base;
        me.length = size;
    }
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = module->comm->c_contextid;
    me.ignore_bits = 0;

    ret = PtlMEAppend(module->ni_h,
                      module->pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &module->data_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    me.start = &module->state;
    me.length = sizeof(module->state);
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = module->comm->c_contextid | OSC_PORTALS4_MB_CONTROL;
    me.ignore_bits = 0;

    ret = PtlMEAppend(module->ni_h,
                      module->pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &module->control_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    module->opcount = 0;
    module->match_bits = module->comm->c_contextid;
    module->atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
        mca_osc_portals4_component.matching_atomic_max :
        MIN(mca_osc_portals4_component.matching_atomic_max,
            mca_osc_portals4_component.matching_atomic_ordered_size);
    module->fetch_atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
        mca_osc_portals4_component.matching_fetch_atomic_max :
        MIN(mca_osc_portals4_component.matching_fetch_atomic_max,
            mca_osc_portals4_component.matching_atomic_ordered_size);

    module->zero = 0;
    module->one = 1;
    module->start_group = NULL;
    module->post_group = NULL;

    module->state.post_count = 0;
    module->state.complete_count = 0;
    if (check_config_value_bool("no_locks", info)) {
        module->state.lock = LOCK_ILLEGAL;
    } else {
        module->state.lock = LOCK_UNLOCKED;
    }

    OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t);

    module->passive_target_access_epoch = false;

#if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32
    *model = MPI_WIN_UNIFIED;
#else
    *model = MPI_WIN_SEPARATE;
#endif

    win->w_osc_module = &module->super;

    PtlAtomicSync();

    /* Make sure that everyone's ready to receive. */
    module->comm->c_coll.coll_barrier(module->comm,
                                      module->comm->c_coll.coll_barrier_module);

    return OMPI_SUCCESS;

 error:
    /* BWB: FIX ME: This is all wrong... */
    if (0 != module->ct_h) PtlCTFree(module->ct_h);
    if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h);
    if (0 != module->req_md_h) PtlMDRelease(module->req_md_h);
    if (0 != module->md_h) PtlMDRelease(module->md_h);
    if (NULL != module->comm) ompi_comm_free(&module->comm);
    if (NULL != module) free(module);

    return ret;
}
Beispiel #19
0
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
                    int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype *dt_ptr;
    MPID_Request *sreq = NULL;
    ptl_me_t me;
    int initial_iov_count, remaining_iov_count;
    ptl_md_t md;
    MPI_Aint last;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);

    MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
    sreq->dev.match.parts.rank = dest;
    sreq->dev.match.parts.tag = tag;
    sreq->dev.match.parts.context_id = comm->context_id + context_offset;
    sreq->ch.vc = vc;

    if (!vc_ptl->id_initialized) {
        mpi_errno = MPID_nem_ptl_init_id(vc);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz));

    if (data_sz <= PTL_LARGE_THRESHOLD) {
        /* Small message.  Send all data eagerly */
        if (dt_contig) {
            void *start = (char *)buf + dt_true_lb;
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message");
            REQ_PTL(sreq)->event_handler = handler_send;
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler));
            if (start == NULL)
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            else
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler);
            goto fn_exit;
        }

        /* noncontig data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message");
        sreq->dev.segment_ptr = MPID_Segment_alloc();
        MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        sreq->dev.segment_size = data_sz;

        last = sreq->dev.segment_size;
        sreq->dev.iov_count = MPL_IOV_LIMIT;
        MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

        if (last == sreq->dev.segment_size) {
            /* IOV is able to describe entire message */
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    entire message fits in IOV");
            md.start = sreq->dev.iov;
            md.length = sreq->dev.iov_count;
            md.options = PTL_IOVEC;
            md.eq_handle = MPIDI_nem_ptl_origin_eq;
            md.ct_handle = PTL_CT_NONE;
            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

            REQ_PTL(sreq)->event_handler = handler_send;
            ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                        NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                        NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            goto fn_exit;
        }

        /* IOV is not long enough to describe entire message */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        last = data_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
        MPIU_Assert(last == sreq->dev.segment_size);
        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ,
                                    vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
        goto fn_exit;
    }

    /* Large message.  Send first chunk of data and let receiver get the rest */
    if (dt_contig) {
        /* create ME for buffer so receiver can issue a GET for the data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message");
        big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);

        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                    NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        goto fn_exit;
    }

    /* Large noncontig data */
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message");
    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
    MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = data_sz;

    last = PTL_LARGE_THRESHOLD;
    sreq->dev.iov_count = MPL_IOV_LIMIT;
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

    initial_iov_count = sreq->dev.iov_count;
    sreq->dev.segment_first = last;

    if (last == PTL_LARGE_THRESHOLD) {
        /* first chunk of message fits into IOV */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    first chunk fits in IOV");
        if (initial_iov_count < MPL_IOV_LIMIT) {
            /* There may be space for the rest of the message in this IOV */
            sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count;
            last = sreq->dev.segment_size;

            MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                     &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count);
            remaining_iov_count = sreq->dev.iov_count;

            if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) {
                /* Entire message fit in one IOV */
                int was_incomplete;

                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    rest of message fits in one IOV");
                /* Create ME for remaining data */
                me.start = &sreq->dev.iov[initial_iov_count];
                me.length = remaining_iov_count;
                me.ct_handle = PTL_CT_NONE;
                me.uid = PTL_UID_ANY;
                me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
                               PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC );
                me.match_id = vc_ptl->id;
                me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
                me.ignore_bits = 0;
                me.min_free = 0;

                MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");

                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                                  &REQ_PTL(sreq)->get_me_p[0]);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
                /* increment the cc for the get operation */
                MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
                MPIU_Assert(was_incomplete);

                /* Create MD for first chunk */
                md.start = sreq->dev.iov;
                md.length = initial_iov_count;
                md.options = PTL_IOVEC;
                md.eq_handle = MPIDI_nem_ptl_origin_eq;
                md.ct_handle = PTL_CT_NONE;
                ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

                REQ_PTL(sreq)->event_handler = handler_send;
                ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                goto fn_exit;
            }