예제 #1
0
파일: param.c 프로젝트: forzaclaudio/orcm
static void orcm_info_show_mca_group_params(const mca_base_var_group_t *group, bool want_internal)
{
    const mca_base_var_t *var;
    const int *variables;
    int ret, i, j, count;
    const int *groups;
    char **strings;

    variables = OPAL_VALUE_ARRAY_GET_BASE(&group->group_vars, const int);
    count = opal_value_array_get_size((opal_value_array_t *)&group->group_vars);

    for (i = 0 ; i < count ; ++i) {
        ret = mca_base_var_get(variables[i], &var);
        if (OPAL_SUCCESS != ret || ((var->mbv_flags & MCA_BASE_VAR_FLAG_INTERNAL) &&
                                    !want_internal)) {
            continue;
        }

        ret = mca_base_var_dump(variables[i], &strings, !orcm_info_pretty ? MCA_BASE_VAR_DUMP_PARSABLE : MCA_BASE_VAR_DUMP_READABLE);
        if (OPAL_SUCCESS != ret) {
            continue;
        }

        for (j = 0 ; strings[j] ; ++j) {
            if (0 == j && orcm_info_pretty) {
                char *message;

                asprintf (&message, "MCA %s", group->group_framework);
                orcm_info_out(message, message, strings[j]);
                free(message);
            } else {
                orcm_info_out("", "", strings[j]);
            }
            free(strings[j]);
        }
        free(strings);
    }

    groups = OPAL_VALUE_ARRAY_GET_BASE(&group->group_subgroups, const int);
    count = opal_value_array_get_size((opal_value_array_t *)&group->group_subgroups);

    for (i = 0 ; i < count ; ++i) {
        ret = mca_base_var_group_get(groups[i], &group);
        if (OPAL_SUCCESS != ret) {
            continue;
        }
        orcm_info_show_mca_group_params(group, want_internal);
    }
}
예제 #2
0
int MPI_T_category_get_cvars(int cat_index, int len, int indices[])
{
    const mca_base_var_group_t *group;
    int rc = MPI_SUCCESS;
    const int *vars;
    int i, size;

    if (!mpit_is_initialized ()) {
        return MPI_T_ERR_NOT_INITIALIZED;
    }

    ompi_mpit_lock ();

    do {
        rc = mca_base_var_group_get (cat_index, &group);
        if (0 > rc) {
            rc = (OPAL_ERR_NOT_FOUND == rc) ? MPI_T_ERR_INVALID_INDEX : MPI_ERR_OTHER;
            break;
        }

        size = opal_value_array_get_size((opal_value_array_t *) &group->group_vars);
        vars = OPAL_VALUE_ARRAY_GET_BASE(&group->group_vars, int);

        for (i = 0 ; i < len && i < size ; ++i) {
            indices[i] = vars[i];
        }
    } while (0);

    ompi_mpit_unlock ();

    return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_runtime_query(mca_base_module_t **module,
                    int *priority,
                    const char *hint)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t my_device;
    openib_device_t *device = &my_device;
    int num_devs = 0;
    int i = 0;

    *priority = 0;
    *module = NULL;

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
    #error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs) {
        return OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (NULL != mca_sshmem_verbs_component.hca_name) {
        for (i = 0; i < num_devs; i++) {
            if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
                device->ib_dev = device->ib_devs[i];
                break;
            }
        }
    } else {
        device->ib_dev = device->ib_devs[0];
    }

    if (NULL == device->ib_dev) {
        rc = OSHMEM_ERR_NOT_FOUND;
        goto out;
    }

    if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Obtain device attributes */
    if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate the protection domain for the device */
    device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
    if (NULL == device->ib_pd) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate memory */
    if (!rc) {
        void *addr = NULL;
        size_t size = getpagesize();
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ; 
        uint64_t exp_access_flag = 0;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR  |
                          IBV_EXP_ACCESS_SHARED_MR_USER_READ |
                          IBV_EXP_ACCESS_SHARED_MR_USER_WRITE; 
#endif /* MPAGE_ENABLE */

        struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
        ib_mr = ibv_exp_reg_mr(&in);
        if (NULL == ib_mr) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        } else {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        if (!rc) {
            struct ibv_exp_reg_shared_mr_in in_smr;

            access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ|
                          IBV_EXP_ACCESS_NO_RDMA;

            addr = (void *)mca_sshmem_base_start_address;
            mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle,  addr, access_flag);
            ib_mr = ibv_exp_reg_shared_mr(&in_smr);
            if (NULL == ib_mr) {
                mca_sshmem_verbs_component.has_shared_mr = 0;
            } else {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
                mca_sshmem_verbs_component.has_shared_mr = 1;
            }
        }
#endif /* MPAGE_ENABLE */
    }

    /* all is well - rainbows and butterflies */
    if (!rc) {
        *priority = mca_sshmem_verbs_component.priority;
        *module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
    }

out:
    if (device) {
        if (opal_value_array_get_size(&device->ib_mr_array)) {
            struct ibv_mr** array;
            struct ibv_mr* ib_mr = NULL;
            array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
            while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
                ib_mr = array[0];
                ibv_dereg_mr(ib_mr);
                opal_value_array_remove_item(&device->ib_mr_array, 0);
            }

            if (device->ib_mr_shared) {
                device->ib_mr_shared = NULL;
            }
            OBJ_DESTRUCT(&device->ib_mr_array);
        }

        if (device->ib_pd) {
            ibv_dealloc_pd(device->ib_pd);
            device->ib_pd = NULL;
        }

        if(device->ib_dev_context) {
            ibv_close_device(device->ib_dev_context);
            device->ib_dev_context = NULL;
        }

        if(device->ib_devs) {
            ibv_free_device_list(device->ib_devs);
            device->ib_devs = NULL;
        }
    }

    return rc;
}
예제 #4
0
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t *device = &memheap_device;
    int i;

    assert(ds_buf);

    OPAL_OUTPUT_VERBOSE(
        (70, oshmem_sshmem_base_framework.framework_output,
         "%s: %s: detaching "
            "(id: %d, addr: %p size: %lu, name: %s)\n",
            mca_sshmem_verbs_component.super.base_version.mca_type_name,
            mca_sshmem_verbs_component.super.base_version.mca_component_name,
            ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
    );

    if (device) {
        if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) {
            struct ibv_mr** array;
            struct ibv_mr* ib_mr = NULL;
            array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
            for (i--;i >= 0; i--) {
                ib_mr = array[i];
                if(ibv_dereg_mr(ib_mr)) {
                    OPAL_OUTPUT_VERBOSE(
                        (5, oshmem_sshmem_base_framework.framework_output,
                            "error ibv_dereg_mr(): %d: %s",
                            errno, strerror(errno))
                        );
                    rc = OSHMEM_ERROR;
                }
                opal_value_array_remove_item(&device->ib_mr_array, i);
            }

            if (!rc && device->ib_mr_shared) {
                device->ib_mr_shared = NULL;
            }
            OBJ_DESTRUCT(&device->ib_mr_array);
        }

        if (!rc && device->ib_pd) {
            if (ibv_dealloc_pd(device->ib_pd)) {
                OPAL_OUTPUT_VERBOSE(
                    (5, oshmem_sshmem_base_framework.framework_output,
                        "error ibv_dealloc_pd(): %d: %s",
                        errno, strerror(errno))
                    );
                rc = OSHMEM_ERROR;
            } else {
                device->ib_pd = NULL;
            }
        }

        if(!rc && device->ib_dev_context) {
            if(ibv_close_device(device->ib_dev_context)) {
                OPAL_OUTPUT_VERBOSE(
                    (5, oshmem_sshmem_base_framework.framework_output,
                        "error ibv_close_device(): %d: %s",
                        errno, strerror(errno))
                    );
                rc = OSHMEM_ERROR;
            } else {
                device->ib_dev_context = NULL;
            }
        }

        if(!rc && device->ib_devs) {
            ibv_free_device_list(device->ib_devs);
            device->ib_devs = NULL;
        }
    }

    /* reset the contents of the map_segment_t associated with this
     * shared memory segment.
     */
    shmem_ds_reset(ds_buf);

    return rc;
}
예제 #5
0
char* orte_regex_encode_maps(orte_job_t *jdata)
{
    orte_node_t *node;
    orte_regex_node_t *ndreg;
    int32_t nodenum, i, n;
    bool found, fullname;
    opal_list_t nodelist;
    int len;
    char prefix[ORTE_MAX_NODE_PREFIX];
    int startnum;
    opal_list_item_t *item;
    char **regexargs = NULL, *tmp, *tmp2;
    int32_t num_nodes, start, cnt, ppn, nppn;
    orte_vpid_t vpid_start, start_vpid, end_vpid, base;
    char *regexp = NULL;
    bool byslot;
    orte_node_rank_t node_rank, nrank;
    char suffix, sfx;
    orte_app_context_t *app;
    
    /* this is only for one app_context */
    if (jdata->num_apps > 1) {
        return NULL;
    }
    
    /* determine the mapping policy */
    byslot = true;
    if (jdata->map->policy & ORTE_MAPPING_BYNODE) {
        byslot = false;
    }
    
    /* setup the list of nodes with same prefixes */
    OBJ_CONSTRUCT(&nodelist, opal_list_t);
    
    /* cycle through the node pool */
    for (n=0; n < orte_node_pool->size; n++) {
        if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
            continue;
        }
        /* determine this node's prefix by looking for first non-alpha char */
        fullname = false;
        len = strlen(node->name);
        startnum = -1;
        memset(prefix, 0, ORTE_MAX_NODE_PREFIX);
        suffix = '\0';
        for (i=0; i < len; i++) {
            if (!isalpha(node->name[i])) {
                /* found a non-alpha char */
                if (!isdigit(node->name[i])) {
                    /* if it is anything but a digit, we just use
                     * the entire name, which by definition is unique
                     * by the way we created the node pool
                     */
                    fullname = true;
                    break;
                }
                if ('0' == node->name[i]) {
                    /* if the digit is 0, then add it to the prefix */
                    prefix[i] = node->name[i];
                    continue;
                }
                /* okay, this defines end of the prefix */
                startnum = i;
                break;
            }
            prefix[i] = node->name[i];
        }
        if (fullname || startnum < 0) {
            ndreg = OBJ_NEW(orte_regex_node_t);
            ndreg->prefix = strdup(node->name);
            start_sequence(jdata->jobid, node, ndreg, suffix, -1);
            opal_list_append(&nodelist, &ndreg->super);
            continue;
        }
        /* search for a suffix */
        if (isalpha(node->name[len-1])) {
            suffix = node->name[len-1];
        }
        nodenum = strtol(&node->name[startnum], NULL, 10);
        /* is this prefix already on our list? */
        found = false;
        for (item = opal_list_get_first(&nodelist);
             !found && item != opal_list_get_end(&nodelist);
             item = opal_list_get_next(item)) {
            ndreg = (orte_regex_node_t*)item;
            if (0 == strcmp(prefix, ndreg->prefix)) {
                /* yes - flag it */
                found = true;
                /* see if we have a range or a break in the list - we
                 * break the list if one of the following conditions occurs:
                 *
                 * 1. the node number is out of sequence
                 *
                 * 2. the vpid of the first proc on the node is out
                 *    of sequence - i.e., does not equal the vpid of
                 *    the first proc on the first node + step if bynode,
                 *    or the last proc on the prior node + 1 if byslot
                 *
                 * 3. the starting node rank on the node is out of sequence
                 */
                num_nodes = opal_value_array_get_size(&ndreg->nodes)-1;
                start = OPAL_VALUE_ARRAY_GET_ITEM(&ndreg->nodes, int32_t, num_nodes);
                cnt = OPAL_VALUE_ARRAY_GET_ITEM(&ndreg->cnt, int32_t, num_nodes);
                sfx = OPAL_VALUE_ARRAY_GET_ITEM(&ndreg->suffix, char, num_nodes);
                if (suffix != sfx) {
                    /* break in suffix - start new range */
                    start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
                } else if (nodenum != cnt+start+1) {
                    /* have a break in the node sequence - start new range */
                    start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
                } else {
                    /* cycle through the procs on this node and see if the vpids
                     * for this jobid break the sequencing
                     */
                    vpid_start = OPAL_VALUE_ARRAY_GET_ITEM(&ndreg->starting_vpid, orte_vpid_t, num_nodes);
                    ppn = OPAL_VALUE_ARRAY_GET_ITEM(&ndreg->ppn, int32_t, num_nodes);
                    nrank = OPAL_VALUE_ARRAY_GET_ITEM(&ndreg->nrank, orte_node_rank_t, num_nodes);
                    compute_vpids(node, jdata->jobid, &start_vpid, &end_vpid, &nppn, &node_rank);
                    /* if the ppn doesn't match, then that breaks the sequence */
                    if (nppn != ppn) {
                        start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
                        break;
                    }
                    /* if the starting node rank doesn't match, then that breaks the sequence */
                    if (nrank != node_rank) {
                        start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
                        break;
                    }
                    /* if the vpids don't align correctly, then that breaks the sequence */
                    if (byslot) {
                        base = vpid_start + (ppn * (cnt+1));
                        if (start_vpid != base) {
                            /* break sequence */
                            start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
                            break;
                        }
                    } else {
                        if (start_vpid != (vpid_start + 1)) {
                            /* break sequence */
                            start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
                            break;
                        }
                    }
                    /* otherwise, if everything matches, just increment the cnt */
                    OPAL_VALUE_ARRAY_SET_ITEM(&ndreg->cnt, int32_t, num_nodes, cnt+1);
                }
            }
        }
        if (!found) {
            /* need to add it */
            ndreg = OBJ_NEW(orte_regex_node_t);
            ndreg->prefix = strdup(prefix);
            start_sequence(jdata->jobid, node, ndreg, suffix, nodenum);
            opal_list_append(&nodelist, &ndreg->super);
        }
    }
예제 #6
0
static void _ibv_detach(map_segment_t *s)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t *device = NULL;

    assert(s);

    device = (openib_device_t *)s->context;

    if (device)
    {
        if(!rc && opal_value_array_get_size(&device->ib_mr_array))
        {
            struct ibv_mr** array;
            struct ibv_mr* ib_mr = NULL;
            array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
            while (opal_value_array_get_size(&device->ib_mr_array) > 0)
            {
                ib_mr = array[0];
                if(ibv_dereg_mr(ib_mr))
                {
                    MEMHEAP_ERROR("error ibv_dereg_mr(): %d: %s", errno, strerror(errno));
                    rc = OSHMEM_ERROR;
                }
                opal_value_array_remove_item(&device->ib_mr_array, 0);
            }

            if(!rc && device->ib_mr_shared)
            {
                device->ib_mr_shared = NULL;
            }
            OBJ_DESTRUCT(&device->ib_mr_array);
        }

        if(!rc && device->ib_pd)
        {
            if(ibv_dealloc_pd(device->ib_pd))
            {
                MEMHEAP_ERROR("error ibv_dealloc_pd(): %d: %s", errno, strerror(errno));
                rc = OSHMEM_ERROR;
            }
            else
            {
                device->ib_pd = NULL;
            }
        }

        if(!rc && device->ib_dev_context)
        {
            if(ibv_close_device(device->ib_dev_context))
            {
                MEMHEAP_ERROR("error ibv_close_device(): %d: %s", errno, strerror(errno));
                rc = OSHMEM_ERROR;
            }
            else
            {
                device->ib_dev_context = NULL;
            }
        }

        if(!rc && device->ib_devs)
        {
            ibv_free_device_list(device->ib_devs);
            device->ib_devs = NULL;
        }
    }
}