Пример #1
0
/*
 * Look for an existing usnic proc based on a hashed RTE process
 * name.
 */
ompi_btl_usnic_endpoint_t *
ompi_btl_usnic_proc_lookup_endpoint(ompi_btl_usnic_module_t *receiver,
                                    uint64_t sender_hashed_rte_name)
{
    ompi_btl_usnic_proc_t *proc;
    ompi_btl_usnic_endpoint_t *endpoint;
    opal_list_item_t *item;

    MSGDEBUG1_OUT("lookup_endpoint: recvmodule=%p sendhash=0x%" PRIx64,
                  (void *)receiver, sender_hashed_rte_name);

    for (item = opal_list_get_first(&receiver->all_endpoints);
         item != opal_list_get_end(&receiver->all_endpoints);
         item = opal_list_get_next(item)) {
        endpoint = container_of(item, ompi_btl_usnic_endpoint_t,
                                endpoint_endpoint_li);
        proc = endpoint->endpoint_proc;
        if (ompi_rte_hash_name(&proc->proc_ompi->proc_name) ==
            sender_hashed_rte_name) {
            MSGDEBUG1_OUT("lookup_endpoint: matched endpoint=%p",
                          (void *)endpoint);
            return endpoint;
        }
    }

    /* Didn't find it */
    return NULL;
}
Пример #2
0
/*
 * Look for an existing usnic proc based on a hashed RTE process
 * name.
 */
opal_btl_usnic_endpoint_t *
opal_btl_usnic_proc_lookup_endpoint(opal_btl_usnic_module_t *receiver,
                                    uint64_t sender_proc_name)
{
    opal_btl_usnic_proc_t *proc;
    opal_btl_usnic_endpoint_t *endpoint;
    opal_list_item_t *item;

    MSGDEBUG1_OUT("lookup_endpoint: recvmodule=%p sendhash=0x%" PRIx64,
                  (void *)receiver, sender_hashed_rte_name);

    opal_mutex_lock(&receiver->all_endpoints_lock);
    for (item = opal_list_get_first(&receiver->all_endpoints);
         item != opal_list_get_end(&receiver->all_endpoints);
         item = opal_list_get_next(item)) {
        endpoint = container_of(item, opal_btl_usnic_endpoint_t,
                                endpoint_endpoint_li);
        proc = endpoint->endpoint_proc;
        /* Note that this works today because opal_proc_t->proc_name
           is unique across the universe.  George is potentially
           working to give handles instead of proc names, and then
           have a function pointer to perform comparisons.  This would
           be bad here in the critical path, though... */
        if (proc->proc_opal->proc_name == sender_proc_name) {
            MSGDEBUG1_OUT("lookup_endpoint: matched endpoint=%p",
                          (void *)endpoint);
            opal_mutex_unlock(&receiver->all_endpoints_lock);
            return endpoint;
        }
    }
    opal_mutex_unlock(&receiver->all_endpoints_lock);

    /* Didn't find it */
    return NULL;
}
Пример #3
0
/*
 * Create an endpoint and claim the matched modex slot
 */
int
opal_btl_usnic_create_endpoint(opal_btl_usnic_module_t *module,
                opal_btl_usnic_proc_t *proc,
                opal_btl_usnic_endpoint_t **endpoint_o)
{
    int err;
    int modex_index;
    opal_btl_usnic_endpoint_t *endpoint;

    /* look for matching modex info */
    err = match_modex(module, proc, &modex_index);
    if (OPAL_SUCCESS != err) {
        opal_output_verbose(5, USNIC_OUT,
                            "btl:usnic:create_endpoint: did not match usnic modex info for peer %s",
                            OPAL_NAME_PRINT(proc->proc_opal->proc_name));
        return err;
    }

    endpoint = OBJ_NEW(opal_btl_usnic_endpoint_t);
    if (NULL == endpoint) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* Initalize the endpoint */
    endpoint->endpoint_module = module;
    assert(modex_index >= 0 && modex_index < (int)proc->proc_modex_count);
    endpoint->endpoint_remote_addr = proc->proc_modex[modex_index];

    /* Initialize endpoint sequence number info */
    endpoint->endpoint_next_seq_to_send = module->local_addr.isn;
    endpoint->endpoint_ack_seq_rcvd = endpoint->endpoint_next_seq_to_send - 1;
    endpoint->endpoint_next_contig_seq_to_recv =
        endpoint->endpoint_remote_addr.isn;
    endpoint->endpoint_highest_seq_rcvd =
        endpoint->endpoint_next_contig_seq_to_recv - 1;
    endpoint->endpoint_rfstart = WINDOW_SIZE_MOD(endpoint->endpoint_next_contig_seq_to_recv);

    /* Defer creating the ibv_ah.  Since calling ibv_create_ah() may
       trigger ARP resolution, it's better to batch all the endpoints'
       calls to ibv_create_ah() together to get some parallelism. */
    endpoint->endpoint_remote_ah = NULL;

    /* Now claim that modex slot */
    proc->proc_modex_claimed[modex_index] = true;
    MSGDEBUG1_OUT("create_endpoint: module=%p claimed endpoint=%p on proc=%p (hash=0x%" PRIx64 ")\n",
                  (void *)module, (void *)endpoint, (void *)proc,
                  proc->proc_opal->proc_name);

    /* Save the endpoint on this proc's array of endpoints */
    proc->proc_endpoints[proc->proc_endpoint_count] = endpoint;
    endpoint->endpoint_proc_index = proc->proc_endpoint_count;
    endpoint->endpoint_proc = proc;
    ++proc->proc_endpoint_count;
    OBJ_RETAIN(proc);

    /* also add endpoint to module's list of endpoints (done here and
       not in the endpoint constructor because we aren't able to pass
       the module as a constructor argument -- doh!). */
    opal_mutex_lock(&module->all_endpoints_lock);
    opal_list_append(&(module->all_endpoints),
            &(endpoint->endpoint_endpoint_li));
    endpoint->endpoint_on_all_endpoints = true;
    opal_mutex_unlock(&module->all_endpoints_lock);

    *endpoint_o = endpoint;
    return OPAL_SUCCESS;
}
Пример #4
0
/* Responsible for handling "large" frags (reserve + *size > max_frag_payload)
 * in the same manner as btl_prepare_src.  Must return a smaller amount than
 * requested if the given convertor cannot process the entire (*size).
 */
static opal_btl_usnic_send_frag_t *
prepare_src_large(
    struct opal_btl_usnic_module_t* module,
    struct mca_btl_base_endpoint_t* endpoint,
    struct opal_convertor_t* convertor,
    uint8_t order,
    size_t reserve,
    size_t* size,
    uint32_t flags)
{
    opal_btl_usnic_send_frag_t *frag;
    opal_btl_usnic_large_send_frag_t *lfrag;
    int rc;

    /* Get holder for the msg */
    lfrag = opal_btl_usnic_large_send_frag_alloc(module);
    if (OPAL_UNLIKELY(NULL == lfrag)) {
        return NULL;
    }
    frag = &lfrag->lsf_base;

    /* The header location goes in SG[0], payload in SG[1].  If we are using a
     * convertor then SG[1].seg_len is accurate but seg_addr is NULL. */
    frag->sf_base.uf_base.USNIC_SEND_LOCAL_COUNT = 2;

    /* stash header location, PML will write here */
    frag->sf_base.uf_local_seg[0].seg_addr.pval = &lfrag->lsf_ompi_header;
    frag->sf_base.uf_local_seg[0].seg_len = reserve;
    /* make sure upper header small enough */
    assert(reserve <= sizeof(lfrag->lsf_ompi_header));

    if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
        /* threshold == -1 means always pack eagerly */
        if (mca_btl_usnic_component.pack_lazy_threshold >= 0 &&
            *size >= (size_t)mca_btl_usnic_component.pack_lazy_threshold) {
            MSGDEBUG1_OUT("packing frag %p on the fly", (void *)frag);
            lfrag->lsf_pack_on_the_fly = true;

            /* tell the PML we will absorb as much as possible while still
             * respecting indivisible element boundaries in the convertor */
            *size = opal_btl_usnic_convertor_pack_peek(convertor, *size);

            /* Clone the convertor b/c we (the BTL) don't own it and the PML
             * might mutate it after we return from this function. */
            rc = opal_convertor_clone(convertor, &frag->sf_convertor,
                                      /*copy_stack=*/true);
            if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
                BTL_ERROR(("unexpected convertor clone error"));
                abort(); /* XXX */
            }
        }
        else {
            /* pack everything in the convertor into a chain of segments now,
             * leaving space for the PML header in the first segment */
            lfrag->lsf_base.sf_base.uf_local_seg[0].seg_addr.pval =
                pack_chunk_seg_chain_with_reserve(module, lfrag, reserve,
                                                  convertor, *size, size);
        }

        /* We set SG[1] to {NULL,bytes_packed} so that various calculations
         * by both PML and this BTL will be correct.  For example, the PML adds
         * up the bytes in the descriptor segments to determine if an MPI-level
         * request is complete or not. */
        frag->sf_base.uf_local_seg[1].seg_addr.pval = NULL;
        frag->sf_base.uf_local_seg[1].seg_len = *size;
    } else {
        /* convertor not needed, just save the payload pointer in SG[1] */
        lfrag->lsf_pack_on_the_fly = true;
        opal_convertor_get_current_pointer(convertor,
                                           &frag->sf_base.uf_local_seg[1].seg_addr.pval);
        frag->sf_base.uf_local_seg[1].seg_len = *size;
    }

    frag->sf_base.uf_base.des_flags = flags;
    frag->sf_endpoint = endpoint;

    return frag;
}