/**
 * opal_carto_base_get_nodes_distance - returns the distance of
 * all the nodes from the reference node.
 * 
 * @param graph
 * @param reference_node
 * @param node_type the type of the nodes in the returned array
 * @param dist_array
 * 
 * @return int number of nodes in the returned array.
 */
int opal_carto_base_get_nodes_distance_fn(opal_carto_graph_t *graph, opal_carto_base_node_t *reference_node, 
                                       const char *node_type, opal_value_array_t *dist_array)
{
    opal_value_array_t *distance_array;
    vertex_distance_from_t *vertex_distance;
    opal_carto_base_node_t *node;
    uint32_t i, graph_order;
    int distance_array_size;
    opal_carto_node_distance_t node_distance;


    distance_array = OBJ_NEW(opal_value_array_t);
    opal_value_array_init(distance_array, sizeof(vertex_distance_from_t));
    opal_value_array_reserve(distance_array,50);
    /* use dijkstra algorithm to receive the distance of all the nodes from the referenced node */
    graph_order = opal_graph_dijkstra(graph, reference_node->vertex, distance_array);
    /* for all the nodes in the dijkstra array */
    for (i = 0, distance_array_size = 0; i < graph_order; i++) {
        vertex_distance = opal_value_array_get_item(distance_array, i);
        node = vertex_distance->vertex->vertex_data;
        /* check if the node is in the correct type */
        if (NULL == node_type || 0 == strcmp(node->node_type, node_type)) {
            /* assigne the result distance array */
            node_distance.node = vertex_distance->vertex->vertex_data;
            node_distance.node_distance = vertex_distance->weight;
            opal_value_array_append_item(dist_array, (void *)&node_distance);
        }
    }
    /* return the result distance array */
    return distance_array_size;
}
Example #2
0
/**
 * This graph API returns all the adjacents of a vertex and the
 * distance (weight) of those adjacents and the vertex.
 *
 * @param graph
 * @param vertex The reference vertex
 * @param adjacents An allocated pointer array of vertices and
 *                  their distance from the reference vertex.
 *                  Note that this pointer should be free after
 *                  usage by the user
 *
 * @return int the number of adjacents in the list.
 */
int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_graph_vertex_t *vertex, opal_value_array_t *adjacents)
{
    opal_adjacency_list_t *adj_list;
    opal_graph_edge_t *edge;
    int adjacents_number;
    opal_list_item_t *item;
    vertex_distance_from_t distance_from;
    int i;

    /**
     * Verify that the vertex belongs to the graph.
     */
    if (graph != vertex->in_graph) {
        OPAL_OUTPUT((0,"Vertex %p not in the graph %p\n", (void *)vertex, (void *)graph));
        return 0;
    }
    /**
     * find the adjacency list that this vertex belongs to
     */
    adj_list = (opal_adjacency_list_t *) vertex->in_adj_list;
    /* find the number of adjcents of this vertex */
    adjacents_number = opal_list_get_size(adj_list->edges);
    /* Run on all the edges from this vertex */
    for (item = opal_list_get_first(adj_list->edges), i = 0;
         item != opal_list_get_end(adj_list->edges);
         item  = opal_list_get_next(item), i++) {
        edge = (opal_graph_edge_t *)item;
        /* assign vertices and their weight in the adjcents list */
        distance_from.vertex = edge->end;
        distance_from.weight = edge->weight;
        opal_value_array_append_item(adjacents, &distance_from);
    }
    /* return the number of the adjacents in the list */
    return adjacents_number;
}
Example #3
0
static void start_sequence(orte_jobid_t jobid, orte_node_t *node,
                           orte_regex_node_t *ndreg, char suffix, int32_t nodenum)
{
    int32_t j, ppn;
    orte_vpid_t start_vpid, end_vpid;
    orte_node_rank_t nrank;
    
    opal_value_array_append_item(&ndreg->suffix, &suffix);
    opal_value_array_append_item(&ndreg->nodes, &nodenum);
    j = 0;
    opal_value_array_append_item(&ndreg->cnt, &j);
    compute_vpids(node, jobid, &start_vpid, &end_vpid, &ppn, &nrank);
    opal_value_array_append_item(&ndreg->starting_vpid, &start_vpid);
    opal_value_array_append_item(&ndreg->ppn, &ppn);
    opal_value_array_append_item(&ndreg->nrank, &nrank);
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_runtime_query(mca_base_module_t **module,
                    int *priority,
                    const char *hint)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t my_device;
    openib_device_t *device = &my_device;
    int num_devs = 0;
    int i = 0;

    *priority = 0;
    *module = NULL;

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
    #error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs) {
        return OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (NULL != mca_sshmem_verbs_component.hca_name) {
        for (i = 0; i < num_devs; i++) {
            if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
                device->ib_dev = device->ib_devs[i];
                break;
            }
        }
    } else {
        device->ib_dev = device->ib_devs[0];
    }

    if (NULL == device->ib_dev) {
        rc = OSHMEM_ERR_NOT_FOUND;
        goto out;
    }

    if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Obtain device attributes */
    if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate the protection domain for the device */
    device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
    if (NULL == device->ib_pd) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate memory */
    if (!rc) {
        void *addr = NULL;
        size_t size = getpagesize();
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ; 
        uint64_t exp_access_flag = 0;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR  |
                          IBV_EXP_ACCESS_SHARED_MR_USER_READ |
                          IBV_EXP_ACCESS_SHARED_MR_USER_WRITE; 
#endif /* MPAGE_ENABLE */

        struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
        ib_mr = ibv_exp_reg_mr(&in);
        if (NULL == ib_mr) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        } else {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        if (!rc) {
            struct ibv_exp_reg_shared_mr_in in_smr;

            access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ|
                          IBV_EXP_ACCESS_NO_RDMA;

            addr = (void *)mca_sshmem_base_start_address;
            mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle,  addr, access_flag);
            ib_mr = ibv_exp_reg_shared_mr(&in_smr);
            if (NULL == ib_mr) {
                mca_sshmem_verbs_component.has_shared_mr = 0;
            } else {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
                mca_sshmem_verbs_component.has_shared_mr = 1;
            }
        }
#endif /* MPAGE_ENABLE */
    }

    /* all is well - rainbows and butterflies */
    if (!rc) {
        *priority = mca_sshmem_verbs_component.priority;
        *module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
    }

out:
    if (device) {
        if (opal_value_array_get_size(&device->ib_mr_array)) {
            struct ibv_mr** array;
            struct ibv_mr* ib_mr = NULL;
            array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
            while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
                ib_mr = array[0];
                ibv_dereg_mr(ib_mr);
                opal_value_array_remove_item(&device->ib_mr_array, 0);
            }

            if (device->ib_mr_shared) {
                device->ib_mr_shared = NULL;
            }
            OBJ_DESTRUCT(&device->ib_mr_array);
        }

        if (device->ib_pd) {
            ibv_dealloc_pd(device->ib_pd);
            device->ib_pd = NULL;
        }

        if(device->ib_dev_context) {
            ibv_close_device(device->ib_dev_context);
            device->ib_dev_context = NULL;
        }

        if(device->ib_devs) {
            ibv_free_device_list(device->ib_devs);
            device->ib_devs = NULL;
        }
    }

    return rc;
}
Example #5
0
/**
 * segment_attach can only be called after a successful call to segment_create
 */
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
    openib_device_t *device = &memheap_device;
    static int mr_count = 0;
    void *addr = NULL;

    assert(ds_buf);
    assert(mkey->va_base == 0);

    if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) {
        return (mkey->va_base);
    }

    /* workaround mtt problem - request aligned addresses */
    ++mr_count;
    addr = (void *)((uintptr_t)mca_sshmem_base_start_address +
        mca_sshmem_verbs_component.mr_interleave_factor * 1024ULL * 1024ULL * 1024ULL * mr_count);
    {
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
            IBV_ACCESS_REMOTE_WRITE |
            IBV_ACCESS_REMOTE_READ |
            IBV_EXP_ACCESS_NO_RDMA;
        struct ibv_exp_reg_shared_mr_in in;

        mca_sshmem_verbs_fill_shared_mr(&in, device->ib_pd, mkey->u.key, addr, access_flag);
        ib_mr = ibv_exp_reg_shared_mr(&in);
        if (NULL == ib_mr) {
            mkey->va_base = (void *)-1;
            OPAL_OUTPUT_VERBOSE(
                (5, oshmem_sshmem_base_framework.framework_output,
                    "error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
                    (unsigned long long)ds_buf->seg_size, errno, strerror(errno))
                );
        } else {
            if (ib_mr->addr != addr) {
                OPAL_OUTPUT_VERBOSE(
                    (5, oshmem_sshmem_base_framework.framework_output,
                        "Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d",
                        addr, ib_mr->addr, mca_sshmem_verbs_component.mr_interleave_factor)
                    );
            }

            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
            mkey->va_base = ib_mr->addr;
        }
    }

    OPAL_OUTPUT_VERBOSE(
        (70, oshmem_sshmem_base_framework.framework_output,
         "%s: %s: attach successful "
            "(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
            mca_sshmem_verbs_component.super.base_version.mca_type_name,
            mca_sshmem_verbs_component.super.base_version.mca_component_name,
            ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
            mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
    );

    /* update returned base pointer with an offset that hides our stuff */
    return (mkey->va_base);
}
Example #6
0
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
               const char *file_name,
               size_t size)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t *device = &memheap_device;
    int num_devs = 0;
    int i = 0;

    assert(ds_buf);

    /* init the contents of map_segment_t */
    shmem_ds_reset(ds_buf);

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs) {
        return OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (NULL != mca_sshmem_verbs_component.hca_name) {
        for (i = 0; i < num_devs; i++) {
            if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
                device->ib_dev = device->ib_devs[i];
                break;
            }
        }
    } else {
        device->ib_dev = device->ib_devs[0];
    }

    if (NULL == device->ib_dev) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error getting device says %d: %s",
            errno, strerror(errno))
            );
        return OSHMEM_ERR_NOT_FOUND;
    }

    if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error obtaining device context for %s errno says %d: %s",
            ibv_get_device_name(device->ib_dev), errno, strerror(errno))
            );
        return OSHMEM_ERR_RESOURCE_BUSY;
    }

    /* Obtain device attributes */
    if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error obtaining device attributes for %s errno says %d: %s",
            ibv_get_device_name(device->ib_dev), errno, strerror(errno))
            );
        return OSHMEM_ERR_RESOURCE_BUSY;
    }

    /* Allocate the protection domain for the device */
    device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
    if (NULL == device->ib_pd) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error allocating protection domain for %s errno says %d: %s",
            ibv_get_device_name(device->ib_dev), errno, strerror(errno))
            );
        return OSHMEM_ERR_RESOURCE_BUSY;
    }

    /* Allocate memory */
    if (!rc) {
        void *addr = NULL;
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ;
        uint64_t exp_access_flag = 0;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if (MPAGE_ENABLE > 0)
        exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
                          IBV_EXP_ACCESS_SHARED_MR_USER_READ |
                          IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */

        struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};

#if MPAGE_HAVE_IBV_EXP_REG_MR_CREATE_FLAGS
        if (0 == mca_sshmem_verbs_component.has_shared_mr) {
            in.addr = (void *)mca_sshmem_base_start_address;
            in.comp_mask    = IBV_EXP_REG_MR_CREATE_FLAGS;
            in.create_flags = IBV_EXP_REG_MR_CREATE_CONTIG;
            in.exp_access   = access_flag;
        }
#endif
        ib_mr = ibv_exp_reg_mr(&in);
        if (NULL == ib_mr) {
            OPAL_OUTPUT_VERBOSE(
                (5, oshmem_sshmem_base_framework.framework_output,
                    "error to ibv_exp_reg_mr() %llu bytes errno says %d: %s",
                    (unsigned long long)size, errno, strerror(errno))
                );
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        } else {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if (MPAGE_ENABLE > 0)
        if (!rc && mca_sshmem_verbs_component.has_shared_mr) {
            void *addr = NULL;
            access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ|
                          IBV_EXP_ACCESS_NO_RDMA;

            addr = (void *)mca_sshmem_base_start_address;
            struct ibv_exp_reg_shared_mr_in in;
            mca_sshmem_verbs_fill_shared_mr(&in, device->ib_pd, device->ib_mr_shared->handle, addr, access_flag);
            ib_mr = ibv_exp_reg_shared_mr(&in);
            if (NULL == ib_mr) {
                OPAL_OUTPUT_VERBOSE(
                    (5, oshmem_sshmem_base_framework.framework_output,
                        "error to ibv_reg_shared_mr() %llu bytes errno says %d: %s has_shared_mr: %d",
                        (unsigned long long)size, errno, strerror(errno),
                        mca_sshmem_verbs_component.has_shared_mr
                        )
                    );
                rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            } else {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
            }
        }
#endif /* MPAGE_ENABLE */

        if (!rc) {
            OPAL_OUTPUT_VERBOSE(
                (70, oshmem_sshmem_base_framework.framework_output,
                "ibv device %s shared_mr: %d",
                ibv_get_device_name(device->ib_dev),
                mca_sshmem_verbs_component.has_shared_mr)
                );

            if (mca_sshmem_verbs_component.has_shared_mr) {
                assert(size == device->ib_mr_shared->length);
                ds_buf->type = MAP_SEGMENT_ALLOC_IBV;
                ds_buf->seg_id = device->ib_mr_shared->handle;
            } else {
                ds_buf->type = MAP_SEGMENT_ALLOC_IBV_NOSHMR;
                ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
            }
            ds_buf->super.va_base = ib_mr->addr;
            ds_buf->seg_size = size;
            ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
        }
    }

    OPAL_OUTPUT_VERBOSE(
          (70, oshmem_sshmem_base_framework.framework_output,
           "%s: %s: create %s "
           "(id: %d, addr: %p size: %lu, name: %s)\n",
           mca_sshmem_verbs_component.super.base_version.mca_type_name,
           mca_sshmem_verbs_component.super.base_version.mca_component_name,
           (rc ? "failure" : "successful"),
           ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
      );

    return rc;
}
Example #7
0
/**
 * This graph API returns the distance (weight) from a reference
 * vertex to all other vertices in the graph using the Dijkstra
 * algorithm
 *
 * @param graph
 * @param vertex The reference vertex.
 * @param distance_array An array of vertices and
 *         their distance from the reference vertex.
 *
 * @return uint32_t the size of the distance array
 */
uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, opal_value_array_t *distance_array)
{
    int graph_order;
    vertex_distance_from_t *Q, *q_start, *current_vertex;
    opal_list_item_t *adj_list_item;
    opal_adjacency_list_t *adj_list;
    int number_of_items_in_q;
    int i;
    uint32_t weight;


    /**
     * Verify that the reference vertex belongs to the graph.
     */
    if (graph != vertex->in_graph) {
        OPAL_OUTPUT((0,"opal:graph:dijkstra: vertex %p not in the graph %p\n",(void *)vertex,(void *)graph));
        return 0;
    }
    /* get the order of the graph and allocate a working queue accordingly */
    graph_order = opal_graph_get_order(graph);
    Q = (vertex_distance_from_t *)malloc(graph_order * sizeof(vertex_distance_from_t));
    /* assign a pointer to the start of the queue */
    q_start = Q;
    /* run on all the vertices of the graph */
    for (adj_list_item = opal_list_get_first(graph->adjacency_list), i=0;
         adj_list_item != opal_list_get_end(graph->adjacency_list);
         adj_list_item  = opal_list_get_next(adj_list_item), i++) {
        adj_list = (opal_adjacency_list_t *)adj_list_item;
        /* insert the vertices pointes to the working queue */
        Q[i].vertex = adj_list->vertex;
        /**
         * assign an infinity distance to all the vertices in the queue
         * except the reference vertex which its distance should be 0.
         */
        if (Q[i].vertex == vertex) {
            Q[i].weight = 0;
        }
        else {
            Q[i].weight = DISTANCE_INFINITY;
        }
    }
    number_of_items_in_q = i;
    /* sort the working queue according the distance from the reference vertex */
    qsort(q_start, number_of_items_in_q, sizeof(vertex_distance_from_t), compare_vertex_distance);
    /* while the working queue is not empty */
    while (number_of_items_in_q > 0) {
        /* start to work with the first vertex in the working queue */
        current_vertex = q_start;
        /* remove the first vertex from the queue */
        q_start++;
        /* decrees the number of vertices in the queue */
        number_of_items_in_q--;
        /* find the distance of all other vertices in the queue from the first vertex in the queue */
        for (i = 0; i < number_of_items_in_q; i++) {
            weight = opal_graph_adjacent(graph, current_vertex->vertex, q_start[i].vertex);
            /**
             * if the distance from the first vertex in the queue to the I
             * vertex in the queue plus the distance of the first vertex in
             * the queue from the referenced vertex is smaller than the
             * distance of the I vertex from the referenced vertex, assign
             * the lower distance to the I vertex.
             */
            if (current_vertex->weight + weight < q_start[i].weight) {
                q_start[i].weight = weight + current_vertex->weight;
            }
        }
        /* sort again the working queue */
        qsort(q_start, number_of_items_in_q, sizeof(vertex_distance_from_t), compare_vertex_distance);
    }
    /* copy the working queue the the returned distance array */
    for (i = 0; i < graph_order-1; i++) {
        opal_value_array_append_item(distance_array, (void *)&(Q[i+1]));
    }
    /* free the working queue */
    free(Q);
    /* assign the distance array size. */
    return graph_order - 1;
}
Example #8
0
static int _ibv_attach(map_segment_t *s, size_t size)
{
    int rc = OSHMEM_SUCCESS;
    static openib_device_t memheap_device;
    openib_device_t *device = &memheap_device;
    int num_devs = 0;

    assert(s);

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs)
    {
        rc = OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (!rc)
    {
        int i = 0;

        if (num_devs > 1)
        {
            if (NULL == mca_memheap_base_param_hca_name)
            {
                MEMHEAP_VERBOSE(5, "found %d HCAs, choosing the first", num_devs);
            }
            else
            {
                MEMHEAP_VERBOSE(5, "found %d HCAs, searching for %s", num_devs, mca_memheap_base_param_hca_name);
            }
        }

        for (i = 0; i < num_devs; i++)
        {
            device->ib_dev = device->ib_devs[i];

            device->ib_dev_context = ibv_open_device(device->ib_dev);
            if (NULL == device->ib_dev_context)
            {
                MEMHEAP_ERROR("error obtaining device context for %s errno says %d: %s",
                        ibv_get_device_name(device->ib_dev), errno, strerror(errno));
                rc = OSHMEM_ERR_RESOURCE_BUSY;
            }
            else
            {
                if (NULL != mca_memheap_base_param_hca_name)
                {
                    if (0 == strcmp(mca_memheap_base_param_hca_name,ibv_get_device_name(device->ib_dev)))
                    {
                        MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
                        rc = OSHMEM_SUCCESS;
                        break;
                    }
                }
                else
                {
                    MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
                    rc = OSHMEM_SUCCESS;
                    break;
                }
            }
        }
    }

    /* Obtain device attributes */
    if (!rc)
    {
        if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr))
        {
            MEMHEAP_ERROR("error obtaining device attributes for %s errno says %d: %s",
                    ibv_get_device_name(device->ib_dev), errno, strerror(errno));
            rc = OSHMEM_ERR_RESOURCE_BUSY;
        }
        else
        {
            MEMHEAP_VERBOSE(5, "ibv device %s",
                    ibv_get_device_name(device->ib_dev));
        }
    }

    /* Allocate the protection domain for the device */
    if (!rc)
    {
        device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
        if (NULL == device->ib_pd)
        {
            MEMHEAP_ERROR("error allocating protection domain for %s errno says %d: %s",
                    ibv_get_device_name(device->ib_dev), errno, strerror(errno));
            rc = OSHMEM_ERR_RESOURCE_BUSY;
        }
    }

    /* Allocate memory */
    if (!rc)
    {
        void *addr = NULL;
        struct ibv_mr *ib_mr = NULL;
        int access_flag = IBV_ACCESS_LOCAL_WRITE |
        IBV_ACCESS_REMOTE_WRITE |
        IBV_ACCESS_REMOTE_READ;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        access_flag |= IBV_ACCESS_ALLOCATE_MR |
        IBV_ACCESS_SHARED_MR_USER_READ |
        IBV_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */

        ib_mr = ibv_reg_mr(device->ib_pd, addr, size, access_flag);
        if (NULL == ib_mr)
        {
            MEMHEAP_ERROR("error to ibv_reg_mr() %llu bytes errno says %d: %s",
                    (unsigned long long)size, errno, strerror(errno));
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        }
        else
        {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        if (!rc)
        {
            access_flag = IBV_ACCESS_LOCAL_WRITE |
            IBV_ACCESS_REMOTE_WRITE |
            IBV_ACCESS_REMOTE_READ|
            IBV_ACCESS_NO_RDMA;

            addr = (void *)mca_memheap_base_start_address;
            ib_mr = ibv_reg_shared_mr(device->ib_mr_shared->handle,
                    device->ib_pd, addr, access_flag);
            if (NULL == ib_mr)
            {
                MEMHEAP_ERROR("error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
                        (unsigned long long)size, errno, strerror(errno));
                rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            }
            else
            {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
            }
        }
#endif /* MPAGE_ENABLE */

        if (!rc)
        {
            assert(size == device->ib_mr_shared->length);

            s->type = MAP_SEGMENT_ALLOC_IBV;
            s->shmid = device->ib_mr_shared->handle;
            s->start = ib_mr->addr;
            s->size = size;
            s->end = (void*)((uintptr_t)s->start + s->size);
            s->context = &memheap_device;
        }
    }

    return rc;
}
static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id)
{
    /* process special case when va was got using shmget(IPC_PRIVATE)
     * this case is notable for:
     * - key is set as (type|shmid);
     * - va_base is set as 0;
     */
    if (!mkey->va_base
            && ((int) MEMHEAP_SHM_GET_ID(mkey->key) != MEMHEAP_SHM_INVALID)) {
        MEMHEAP_VERBOSE(5,
                        "shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X",
                        tr_id,
                        (unsigned long long)mkey->key,
                        mkey->va_base,
                        MEMHEAP_SHM_GET_TYPE(mkey->key),
                        MEMHEAP_SHM_GET_ID(mkey->key));

        if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_SHM) {
            mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->key),
                                             0,
                                             0);
        } else if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_IBV) {
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
            openib_device_t *device = NULL;
            struct ibv_mr *ib_mr;
            void *addr;
            static int mr_count;

            int access_flag = IBV_ACCESS_LOCAL_WRITE |
            IBV_ACCESS_REMOTE_WRITE |
            IBV_ACCESS_REMOTE_READ |
            IBV_ACCESS_NO_RDMA;

            device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context;
            assert(device);

            /* workaround mtt problem - request aligned addresses */
            ++mr_count;
            addr = (void *)(mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count);
            ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->key),
                    device->ib_pd, addr, access_flag);
            if (NULL == ib_mr)
            {
                mkey->va_base = (void*)-1;
                MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s",
                        errno, strerror(errno));
            }
            else
            {
                if (ib_mr->addr != addr) {
                    MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor);
                }

                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
                mkey->va_base = ib_mr->addr;
            }
#endif /* MPAGE_ENABLE */
        } else {
            MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X",
                          tr_id, 
                          (unsigned long long)mkey->key,
                          MEMHEAP_SHM_GET_TYPE(mkey->key),
                          MEMHEAP_SHM_GET_ID(mkey->key));
            oshmem_shmem_abort(-1);
        }

        if ((void *) -1 == (void *) mkey->va_base) {
            MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d",
                          tr_id, (unsigned long long)mkey->key, errno);
            oshmem_shmem_abort(-1);
        }
    }
}