Exemple #1
0
mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc)
{
    mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL;
    size_t msg_size;
    uint32_t size;
    int rc, i, j;
    void *message;
    char *offset;
    int modex_message_size;
    mca_btl_openib_modex_message_t dummy;
    bool is_new = false;

    /* Check if we have already created a IB proc
     * structure for this ompi process */
    ib_proc = ibproc_lookup_and_lock(proc);
    if (NULL != ib_proc) {
        /* Gotcha! */
        return ib_proc;
    }

    /* All initialization has to be an atomic operation. we do the following assumption:
     * - we let all concurent threads to try to do the initialization;
     * - when one has finished it locks ib_lock and checks if corresponding
     *   process is still missing;
     * - if so - new proc is added, otherwise - initialized proc struct is released.
     */

    /* First time, gotta create a new IB proc
     * out of the opal_proc ... */
    ib_proc = OBJ_NEW(mca_btl_openib_proc_t);
    if (NULL == ib_proc) {
      return NULL;
    }

    /* Initialize number of peer */
    ib_proc->proc_endpoint_count = 0;
    ib_proc->proc_opal = proc;

    /* query for the peer address info */
    OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version,
                    &proc->proc_name, &message, &msg_size);
    if (OPAL_SUCCESS != rc) {
        BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s",
                   __FILE__, __LINE__,
                   OPAL_NAME_PRINT(proc->proc_name)));
        goto no_err_exit;
    }
    if (0 == msg_size) {
        goto no_err_exit;
    }

    /* Message was packed in btl_openib_component.c; the format is
       listed in a comment in that file */
    modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy);

    /* Unpack the number of modules in the message */
    offset = (char *) message;
    unpack8(&offset, &(ib_proc->proc_port_count));
    BTL_VERBOSE(("unpack: %d btls", ib_proc->proc_port_count));
    if (ib_proc->proc_port_count > 0) {
        ib_proc->proc_ports = (mca_btl_openib_proc_modex_t *)
            malloc(sizeof(mca_btl_openib_proc_modex_t) *
                   ib_proc->proc_port_count);
    } else {
        ib_proc->proc_ports = NULL;
    }

    /* Loop over unpacking all the ports */
    for (i = 0; i < ib_proc->proc_port_count; i++) {

        /* Unpack the modex comment message struct */
        size = modex_message_size;
        memcpy(&(ib_proc->proc_ports[i].pm_port_info), offset, size);
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
        MCA_BTL_OPENIB_MODEX_MSG_NTOH(ib_proc->proc_ports[i].pm_port_info);
#endif
        offset += size;
        BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d",
                     i, (int)(offset-((char*)message))));

        /* Unpack the number of CPCs that follow */
        unpack8(&offset, &(ib_proc->proc_ports[i].pm_cpc_data_count));
        BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)",
                     i, ib_proc->proc_ports[i].pm_cpc_data_count,
                     (int)(offset-((char*)message))));
        ib_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *)
            calloc(ib_proc->proc_ports[i].pm_cpc_data_count,
                   sizeof(opal_btl_openib_connect_base_module_data_t));
        if (NULL == ib_proc->proc_ports[i].pm_cpc_data) {
            goto err_exit;
        }

        /* Unpack the CPCs */
        for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) {
            uint8_t u8;
            opal_btl_openib_connect_base_module_data_t *cpcd;
            cpcd = ib_proc->proc_ports[i].pm_cpc_data + j;
            unpack8(&offset, &u8);
            BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)",
                         i, j, u8, (int)(offset-(char*)message)));
            cpcd->cbm_component =
                opal_btl_openib_connect_base_get_cpc_byindex(u8);
            BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s",
                         i, j, cpcd->cbm_component->cbc_name));

            unpack8(&offset, &cpcd->cbm_priority);
            unpack8(&offset, &cpcd->cbm_modex_message_len);
            BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)",
                         i, j, cpcd->cbm_priority,
                         cpcd->cbm_modex_message_len,
                         (int)(offset-(char*)message)));
            if (cpcd->cbm_modex_message_len > 0) {
                cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len);
                if (NULL == cpcd->cbm_modex_message) {
                    BTL_ERROR(("Failed to malloc"));
                    goto err_exit;
                }
                memcpy(cpcd->cbm_modex_message, offset,
                       cpcd->cbm_modex_message_len);
                offset += cpcd->cbm_modex_message_len;
                BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)",
                             i, j,
                             ((uint32_t*)cpcd->cbm_modex_message)[0],
                             ((uint32_t*)cpcd->cbm_modex_message)[1],
                             (int)(offset-((char*)message))));
            }
        }
    }

    if (0 == ib_proc->proc_port_count) {
        ib_proc->proc_endpoints = NULL;
    } else {
        ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**)
            malloc(ib_proc->proc_port_count *
                   sizeof(mca_btl_base_endpoint_t*));
    }
    if (NULL == ib_proc->proc_endpoints) {
        goto err_exit;
    }

    BTL_VERBOSE(("unpacking done!"));

    /* Finally add this process to the initialized procs list */
    opal_mutex_lock(&mca_btl_openib_component.ib_lock);

    ib_proc_ret = ibproc_lookup_no_lock(proc);
    if (NULL == ib_proc_ret) {
        /* if process can't be found in this list - insert it locked
         * it is safe to lock ib_proc here because this thread is
         * the only one who knows about it so far */
        opal_mutex_lock(&ib_proc->proc_lock);
        opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super);
        ib_proc_ret = ib_proc;
        is_new = true;
    } else {
        /* otherwise - release module_proc */
        OBJ_RELEASE(ib_proc);
    }
    opal_mutex_unlock(&mca_btl_openib_component.ib_lock);

    /* if we haven't insert the process - lock it here so we
     * won't lock mca_btl_openib_component.ib_lock */
    if( !is_new ){
        opal_mutex_lock(&ib_proc_ret->proc_lock);
    }

    return ib_proc_ret;

err_exit:

    BTL_ERROR(("%d: error exit from mca_btl_openib_proc_create", OPAL_PROC_MY_NAME.vpid));

no_err_exit:

    OBJ_RELEASE(ib_proc);
    return NULL;
}
Exemple #2
0
mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc)
{
    mca_btl_openib_proc_t* module_proc = NULL;
    size_t msg_size;
    uint32_t size;
    int rc, i, j;
    void *message;
    char *offset;
    int modex_message_size;
    mca_btl_openib_modex_message_t dummy;

    /* Check if we have already created a IB proc
     * structure for this ompi process */
    module_proc = mca_btl_openib_proc_lookup_proc(proc);
    if (NULL != module_proc) {
        /* Gotcha! */
        return module_proc;
    }

    /* Oops! First time, gotta create a new IB proc
     * out of the opal_proc ... */
    module_proc = OBJ_NEW(mca_btl_openib_proc_t);
    /* Initialize number of peer */
    module_proc->proc_endpoint_count = 0;
    module_proc->proc_opal = proc;

    /* query for the peer address info */
    OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version,
                    proc, &message, &msg_size);
    if (OPAL_SUCCESS != rc) {
        BTL_ERROR(("[%s:%d] opal_modex_recv failed for peer %s",
                   __FILE__, __LINE__,
                   OPAL_NAME_PRINT(proc->proc_name)));
        OBJ_RELEASE(module_proc);
        return NULL;
    }
    if (0 == msg_size) {
        return NULL;
    }

    /* Message was packed in btl_openib_component.c; the format is
       listed in a comment in that file */
    modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy);

    /* Unpack the number of modules in the message */
    offset = (char *) message;
    unpack8(&offset, &(module_proc->proc_port_count));
    BTL_VERBOSE(("unpack: %d btls", module_proc->proc_port_count));
    if (module_proc->proc_port_count > 0) {
        module_proc->proc_ports = (mca_btl_openib_proc_modex_t *)
            malloc(sizeof(mca_btl_openib_proc_modex_t) *
                   module_proc->proc_port_count);
    } else {
        module_proc->proc_ports = NULL;
    }

    /* Loop over unpacking all the ports */
    for (i = 0; i < module_proc->proc_port_count; i++) {

        /* Unpack the modex comment message struct */
        size = modex_message_size;
        memcpy(&(module_proc->proc_ports[i].pm_port_info), offset, size);
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
        MCA_BTL_OPENIB_MODEX_MSG_NTOH(module_proc->proc_ports[i].pm_port_info);
#endif
        offset += size;
        BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d",
                     i, (int)(offset-((char*)message))));

        /* Unpack the number of CPCs that follow */
        unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count));
        BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)",
                     i, module_proc->proc_ports[i].pm_cpc_data_count,
                     (int)(offset-((char*)message))));
        module_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *)
            calloc(module_proc->proc_ports[i].pm_cpc_data_count,
                   sizeof(opal_btl_openib_connect_base_module_data_t));
        if (NULL == module_proc->proc_ports[i].pm_cpc_data) {
            return NULL;
        }

        /* Unpack the CPCs */
        for (j = 0; j < module_proc->proc_ports[i].pm_cpc_data_count; ++j) {
            uint8_t u8;
            opal_btl_openib_connect_base_module_data_t *cpcd;
            cpcd = module_proc->proc_ports[i].pm_cpc_data + j;
            unpack8(&offset, &u8);
            BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)",
                         i, j, u8, (int)(offset-(char*)message)));
            cpcd->cbm_component =
                opal_btl_openib_connect_base_get_cpc_byindex(u8);
            BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s",
                         i, j, cpcd->cbm_component->cbc_name));

            unpack8(&offset, &cpcd->cbm_priority);
            unpack8(&offset, &cpcd->cbm_modex_message_len);
            BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)",
                         i, j, cpcd->cbm_priority,
                         cpcd->cbm_modex_message_len,
                         (int)(offset-(char*)message)));
            if (cpcd->cbm_modex_message_len > 0) {
                cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len);
                if (NULL == cpcd->cbm_modex_message) {
                    BTL_ERROR(("Failed to malloc"));
                    return NULL;
                }
                memcpy(cpcd->cbm_modex_message, offset,
                       cpcd->cbm_modex_message_len);
                offset += cpcd->cbm_modex_message_len;
                BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)",
                             i, j,
                             ((uint32_t*)cpcd->cbm_modex_message)[0],
                             ((uint32_t*)cpcd->cbm_modex_message)[1],
                             (int)(offset-((char*)message))));
            }
        }
    }

    if (0 == module_proc->proc_port_count) {
        module_proc->proc_endpoints = NULL;
    } else {
        module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
            malloc(module_proc->proc_port_count *
                   sizeof(mca_btl_base_endpoint_t*));
    }
    if (NULL == module_proc->proc_endpoints) {
        OBJ_RELEASE(module_proc);
        return NULL;
    }

    BTL_VERBOSE(("unpacking done!"));
    return module_proc;
}