/*
 * Print a show_help kind of message for an items still left in the
 * tree
 */
void mca_mpool_base_tree_print(void)
{
    /* If they asked to show 0 leaks, then don't show anything.  */
    if (0 == ompi_debug_show_mpi_alloc_mem_leaks) {
        return;
    }

    num_leaks = 0;
    ompi_rb_tree_traverse(&mca_mpool_base_tree, condition, action);

    if (num_leaks <= ompi_debug_show_mpi_alloc_mem_leaks ||
        ompi_debug_show_mpi_alloc_mem_leaks < 0) {
        opal_show_help("help-mpool-base.txt", "all mem leaks",
                       true, OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                       ompi_process_info.nodename,
                       ompi_process_info.pid, leak_msg);
    } else {
        int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
        opal_show_help("help-mpool-base.txt", "some mem leaks",
                       true, OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                       ompi_process_info.nodename, 
                       ompi_process_info.pid, leak_msg, i,
                       (i > 1) ? "s were" : " was",
                       (i > 1) ? "are" : "is");
    }
    free(leak_msg);
    leak_msg = NULL;
}
Exemplo n.º 2
0
mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
{
    mca_btl_udapl_proc_t* udapl_proc = NULL;
    size_t size;
    int rc;

    /* Check if we have already created a uDAPL proc
     * structure for this ompi process */
    udapl_proc = mca_btl_udapl_proc_lookup_ompi(ompi_proc);
    if(udapl_proc != NULL) {
        return udapl_proc;
    }

    /* create a new udapl proc out of the ompi_proc ... */
    udapl_proc = OBJ_NEW(mca_btl_udapl_proc_t);
    udapl_proc->proc_endpoint_count = 0;
    udapl_proc->proc_ompi = ompi_proc;

    /* query for the peer address info */
    rc = ompi_modex_recv(
                 &mca_btl_udapl_component.super.btl_version,
                 ompi_proc,
                 (void*)&udapl_proc->proc_addrs,
                 &size); 
    if(OMPI_SUCCESS != rc) {
        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
            ("ompi_modex_recv failed for peer %s",
            OMPI_NAME_PRINT(&ompi_proc->proc_name)));
        OBJ_RELEASE(udapl_proc);
        return NULL;
    }

    if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
            ("invalid udapl address for peer %s",
            OMPI_NAME_PRINT(&ompi_proc->proc_name)));
        OBJ_RELEASE(udapl_proc);
        return NULL;
    }

    udapl_proc->proc_addr_count = size/sizeof(mca_btl_udapl_addr_t);
    if (0 == udapl_proc->proc_addr_count) {
        udapl_proc->proc_endpoints = NULL;
    } else {
        udapl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
            malloc(udapl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
    }
    if(NULL == udapl_proc->proc_endpoints) {
        OBJ_RELEASE(udapl_proc);
        return NULL;
    }
    return udapl_proc;
}
Exemplo n.º 3
0
static char* setup_key(const ompi_process_name_t *name, const char *key)
{
    char *pmi_kvs_key;

    if (pmi_keylen_max <= asprintf(&pmi_kvs_key, "%s-%s",
                                   OMPI_NAME_PRINT(name), key)) {
        free(pmi_kvs_key);
        return NULL;
    }

    return pmi_kvs_key;
}
Exemplo n.º 4
0
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
    int reg_cnt, i;

    /* Statistic */
    if (true == mca_mpool_grdma_component.print_stats) {
        opal_output(0, "%s grdma: stats "
                "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
                OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss,
                mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound,
                mpool_grdma->stat_evicted);
    }

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    do_unregistration_gc(mpool);

    do {
        reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
                                                 regs, GRDMA_MPOOL_NREGS);

        for (i = 0 ; i < reg_cnt ; ++i) {
            if (regs[i]->ref_count) {
                regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */
            } else if (mca_mpool_grdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t *) regs[i]);
            }

	    (void) dereg_mem(regs[i]);
        }
    } while (reg_cnt == GRDMA_MPOOL_NREGS);

    OBJ_RELEASE(mpool_grdma->pool);

    OBJ_DESTRUCT(&mpool_grdma->reg_list);
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);
}
Exemplo n.º 5
0
static int mca_bml_r2_add_procs( size_t nprocs, 
                                 struct ompi_proc_t** procs, 
                                 struct opal_bitmap_t* reachable )
{
    size_t p, p_index, n_new_procs = 0;
    struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;  
    struct ompi_proc_t** new_procs = NULL; 
    struct ompi_proc_t *unreach_proc = NULL;
    int rc, ret = OMPI_SUCCESS;

    if(0 == nprocs) {
        return OMPI_SUCCESS;
    }
    
    if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) ) {
        return rc;
    }
    
    /* Select only the procs that don't yet have the BML proc struct. This prevent
     * us from calling btl->add_procs several this on the same destination proc.
     */
    for(p_index = 0; p_index < nprocs; p_index++) { 
        struct ompi_proc_t* proc = procs[p_index]; 

        OBJ_RETAIN(proc); 
        if(NULL !=  proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { 
            continue;  /* go to the next proc */
        }
        /* Allocate the new_procs on demand */
        if( NULL == new_procs ) {
            new_procs = (struct ompi_proc_t **)malloc(nprocs * sizeof(struct ompi_proc_t *));
            if( NULL == new_procs ) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
        }
        new_procs[n_new_procs++] = proc; 
    }

    if ( 0 == n_new_procs ) {
        return OMPI_SUCCESS;
    }

    /* Starting from here we only work on the unregistered procs */
    procs = new_procs; 
    nprocs = n_new_procs; 
    
    /* attempt to add all procs to each r2 */
    btl_endpoints = (struct mca_btl_base_endpoint_t **) 
        malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*)); 
    if (NULL == btl_endpoints) {
        free(new_procs);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
        mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
        int btl_inuse = 0;

        /* if the r2 can reach the destination proc it sets the
         * corresponding bit (proc index) in the reachable bitmap
         * and can return addressing information for each proc
         * that is passed back to the r2 on data transfer calls
         */
        opal_bitmap_clear_all_bits(reachable);
        memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); 

        rc = btl->btl_add_procs(btl, n_new_procs, new_procs, btl_endpoints, reachable);
        if(OMPI_SUCCESS != rc) {
            /* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
             * can take care of this task.
             */
            continue;
        }

        /* for each proc that is reachable */
        for( p = 0; p < n_new_procs; p++ ) {
            if(opal_bitmap_is_set_bit(reachable, p)) {
                ompi_proc_t *proc = new_procs[p]; 
                mca_bml_base_endpoint_t * bml_endpoint = 
                    (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; 
                mca_bml_base_btl_t* bml_btl; 
                size_t size;
                
                if(NULL == bml_endpoint) { 
                    /* allocate bml specific proc data */
                    bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
                    if (NULL == bml_endpoint) {
                        opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
                        free(btl_endpoints);
                        free(new_procs);
                        return OMPI_ERR_OUT_OF_RESOURCE;
                    }
                    
                    /* preallocate space in array for max number of r2s */
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send,  mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma,  mca_bml_r2.num_btl_modules);
                    bml_endpoint->btl_max_send_size = -1;
                    bml_endpoint->btl_proc = proc;
                    proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint; 
                 
                    bml_endpoint->btl_flags_or = 0;
                }

                /* dont allow an additional BTL with a lower exclusivity ranking */
                size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
                if(size > 0) {
                    bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
                    /* skip this btl if the exclusivity is less than the previous */
                    if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity) {
                        btl->btl_del_procs(btl, 1, &proc, &btl_endpoints[p]);
                        continue;
                    }
                }

                /* cache the endpoint on the proc */
                bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
                bml_btl->btl = btl;
                bml_btl->btl_endpoint = btl_endpoints[p];
                bml_btl->btl_weight = 0;
                bml_btl->btl_flags = btl->btl_flags; 
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
                                " the %s BTL without any PUT function attached. Disard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_PUT;
                }
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
                                " the %s BTL without any GET function attached. Discard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_GET;
                }
                if( (bml_btl->btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
                    /**
                     * If no protocol specified, we have 2 choices: we ignore the BTL
                     * as we don't know which protocl to use, or we suppose that all
                     * BTLs support the send protocol. 
                     */
                    bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
                }
                /**
                 * calculate the bitwise OR of the btl flags 
                 */
                bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
                /* This BTL is in use, allow the progress registration */
                btl_inuse++;
            }
        }
        if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
            size_t p;
            bool found = false;
            for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) {
                if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
                    found = true;
                    break;
                }
            }
            if(found == false) {
                mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] = 
                    btl->btl_component->btl_progress;
                mca_bml_r2.num_btl_progress++;
                opal_progress_register( btl->btl_component->btl_progress );
            }
        }
    }
    free(btl_endpoints);

    /* iterate back through procs and compute metrics for registered r2s */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];
        mca_bml_base_endpoint_t* bml_endpoint = 
            (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
        double total_bandwidth = 0;
        uint32_t latency = 0xffffffff;
        size_t n_index;
        size_t n_size;

        /* skip over procs w/ no btl's registered */
        if(NULL == bml_endpoint) {
            continue;
        }

        /* (1) determine the total bandwidth available across all btls
         *     note that we need to do this here, as we may already have btls configured
         * (2) determine the highest priority ranking for latency
         * (3) compute the maximum amount of bytes that can be send without any
         *     weighting. Once the left over is smaller than this number we will
         *     start using the weight to compute the correct amount.
         */
        n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); 
        
        /* sort BTLs in descending order according to bandwidth value */
        qsort(bml_endpoint->btl_send.bml_btls, n_size,
                sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);

        bml_endpoint->btl_rdma_index = 0;
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t* btl = bml_btl->btl;
            total_bandwidth += bml_btl->btl->btl_bandwidth;
            if(btl->btl_latency < latency) {
                latency = btl->btl_latency;
            }
        }
        
        /* (1) set the weight of each btl as a percentage of overall bandwidth
         * (2) copy all btl instances at the highest priority ranking into the
         *     list of btls used for first fragments
         */
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t *btl = bml_btl->btl;

            /* compute weighting factor for this r2 */
            if(btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
            } else {
                bml_btl->btl_weight = (float)(1.0 / n_size);
            }

            /* check to see if this r2 is already in the array of r2s 
             * used for first fragments - if not add it.
             */
            if(btl->btl_latency == latency) {
                mca_bml_base_btl_t* bml_btl_new = 
                    mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
                *bml_btl_new = *bml_btl;
            }

            /* set endpoint max send size as min of available btls */
            if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
               bml_endpoint->btl_max_send_size = btl->btl_max_send_size;

            /* check flags - is rdma prefered */
            if ((btl->btl_flags & (MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET)) &&
                !((proc->proc_arch != ompi_proc_local_proc->proc_arch) &&
                  (0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
                mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
                mca_btl_base_module_t* btl_rdma = bml_btl->btl;

                *bml_btl_rdma = *bml_btl;
                if(bml_endpoint->btl_pipeline_send_length < btl_rdma->btl_rdma_pipeline_send_length) {
                    bml_endpoint->btl_pipeline_send_length = btl_rdma->btl_rdma_pipeline_send_length;
                }
                if(bml_endpoint->btl_send_limit < btl_rdma->btl_min_rdma_pipeline_size) {
                    bml_endpoint->btl_send_limit = btl_rdma->btl_min_rdma_pipeline_size;
                }
            }
        }
    }

    /* see if we have a connection to everyone else */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];

        if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
            if (NULL == unreach_proc) {
                unreach_proc = proc;
            }
            ret = OMPI_ERR_UNREACH;
        }
    }

    if (mca_bml_r2.show_unreach_errors && 
        OMPI_ERR_UNREACH == ret) {
        opal_show_help("help-mca-bml-r2.txt",
                       "unreachable proc",
                       true, 
                       OMPI_NAME_PRINT(&(ompi_proc_local_proc->proc_name)),
                       (NULL != ompi_proc_local_proc->proc_hostname ?
                        ompi_proc_local_proc->proc_hostname : "unknown!"),
                       OMPI_NAME_PRINT(&(unreach_proc->proc_name)),
                       (NULL != ompi_proc_local_proc->proc_hostname ?
                        ompi_proc_local_proc->proc_hostname : "unknown!"),
                       btl_names);
    }

    free(new_procs); 

    return ret;
}
Exemplo n.º 6
0
int
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
                            size_t nprocs,
                            struct ompi_proc_t** procs)
{
    int ret, me;
    size_t i;
    bool new_found = false;

    /* Get the list of ptl_process_id_t from the runtime and copy into structure */
    for (i = 0 ; i < nprocs ; ++i) {
        ptl_process_t *modex_id;
        size_t size;

        if( procs[i] == ompi_proc_local_proc ) {
            me = i;
        }

        if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "Portals 4 MTL does not support heterogeneous operations.");
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "Proc %s architecture %x, mine %x.",
                                OMPI_NAME_PRINT(&procs[i]->super.proc_name), 
                                procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch);
            return OMPI_ERR_NOT_SUPPORTED;
        }

        OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
                        &procs[i]->super, (char**)&modex_id, &size);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_modex_recv failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        } else if (sizeof(ptl_process_t) != size) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_modex_recv failed: %d\n",
                                __FILE__, __LINE__, ret);
            return OMPI_ERR_BAD_PARAM;
        }

        if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
            ptl_process_t *peer_id;
            peer_id = malloc(sizeof(ptl_process_t));
            if (NULL == peer_id) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: malloc failed: %d\n",
                                    __FILE__, __LINE__, ret);
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
            *peer_id = *modex_id;
            procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id;

            new_found = true;
        } else {
            ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
            if (proc->phys.nid != modex_id->phys.nid ||
                proc->phys.pid != modex_id->phys.pid) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: existing peer and modex peer don't match\n",
                                    __FILE__, __LINE__);
                return OMPI_ERROR;
            }
        }
    }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    if (new_found) {
        ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: flowctl_add_procs failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
    }
#endif

    return OMPI_SUCCESS;
}
Exemplo n.º 7
0
static int bootstrap_comm(ompi_communicator_t *comm,
                          mca_coll_sm_module_t *module)
{
    int i;
    char *shortpath, *fullpath;
    mca_coll_sm_component_t *c = &mca_coll_sm_component;
    mca_coll_sm_comm_t *data = module->sm_comm_data;
    int comm_size = ompi_comm_size(comm);
    int num_segments = c->sm_comm_num_segments;
    int num_in_use = c->sm_comm_num_in_use_flags;
    int frag_size = c->sm_fragment_size;
    int control_size = c->sm_control_size;
    ompi_process_name_t *lowest_name = NULL;
    size_t size;
    ompi_proc_t *proc;

    /* Make the rendezvous filename for this communicators shmem data
       segment.  The CID is not guaranteed to be unique among all
       procs on this node, so also pair it with the PID of the proc
       with the lowest ORTE name to form a unique filename. */
    proc = ompi_group_peer_lookup(comm->c_local_group, 0);
    lowest_name = &(proc->proc_name);
    for (i = 1; i < comm_size; ++i) {
        proc = ompi_group_peer_lookup(comm->c_local_group, i);
        if (ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, 
                                          &(proc->proc_name),
                                          lowest_name) < 0) {
            lowest_name = &(proc->proc_name);
        }
    }
    asprintf(&shortpath, "coll-sm-cid-%d-name-%s.mmap", comm->c_contextid,
             OMPI_NAME_PRINT(lowest_name));
    if (NULL == shortpath) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:enable:bootstrap comm (%d/%s): asprintf failed", 
                            comm->c_contextid, comm->c_name);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    fullpath = opal_os_path(false, ompi_process_info.job_session_dir,
                            shortpath, NULL);
    free(shortpath);
    if (NULL == fullpath) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:enable:bootstrap comm (%d/%s): opal_os_path failed", 
                            comm->c_contextid, comm->c_name);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* Calculate how much space we need in the per-communicator shmem
       data segment.  There are several values to add:

       - size of the barrier data (2 of these):
           - fan-in data (num_procs * control_size)
           - fan-out data (num_procs * control_size)
       - size of the "in use" buffers:
           - num_in_use_buffers * control_size
       - size of the message fragment area (one for each segment):
           - control (num_procs * control_size)
           - fragment data (num_procs * (frag_size))

       So it's:

           barrier: 2 * control_size + 2 * control_size
           in use:  num_in_use * control_size
           control: num_segments * (num_procs * control_size * 2 +
                                    num_procs * control_size)
           message: num_segments * (num_procs * frag_size)
     */

    size = 4 * control_size +
        (num_in_use * control_size) +
        (num_segments * (comm_size * control_size * 2)) +
        (num_segments * (comm_size * frag_size));
    opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                        "coll:sm:enable:bootstrap comm (%d/%s): attaching to %" PRIsize_t " byte mmap: %s",
                        comm->c_contextid, comm->c_name, size, fullpath);
    data->sm_bootstrap_meta =
        mca_common_sm_init_group(comm->c_local_group, size, fullpath,
                                 sizeof(mca_common_sm_seg_header_t),
                                 getpagesize());
    if (NULL == data->sm_bootstrap_meta) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:enable:bootstrap comm (%d/%s): mca_common_sm_init_group failed", 
                            comm->c_contextid, comm->c_name);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* All done */
    return OMPI_SUCCESS;
}
mca_mpool_base_module_t* mca_mpool_base_module_create(
    const char* name, 
    void* user_data,
    struct mca_mpool_base_resources_t* resources) 
{
    mca_mpool_base_component_t* component = NULL; 
    mca_mpool_base_module_t* module = NULL; 
    opal_list_item_t* item;
    mca_mpool_base_selected_module_t *sm;

    for (item = opal_list_get_first(&ompi_mpool_base_framework.framework_components);
         item != opal_list_get_end(&ompi_mpool_base_framework.framework_components);
         item = opal_list_get_next(item)) {
         mca_base_component_list_item_t *cli = 
           (mca_base_component_list_item_t *) item;
         component = 
           (mca_mpool_base_component_t *) cli->cli_component;
         if(0 == strcmp(component->mpool_version.mca_component_name, name)) {
             break;
         }
    }

    if (opal_list_get_end(&ompi_mpool_base_framework.framework_components) == item) {
        return NULL;
    }
    module = component->mpool_init(resources); 
    if ( NULL == module ) {
        return NULL;
    }
    sm = OBJ_NEW(mca_mpool_base_selected_module_t); 
    sm->mpool_component = component; 
    sm->mpool_module = module; 
    sm->user_data = user_data;
    sm->mpool_resources = resources;
    opal_list_append(&mca_mpool_base_modules, (opal_list_item_t*) sm); 
    /* on the very first creation of a module we init the memory
       callback */
    if (opal_list_get_size(&mca_mpool_base_modules) == 1) { 
        /* Default to not using memory hooks */
        int use_mem_hooks = 0;

        /* Use the memory hooks if leave_pinned or
           leave_pinned_pipeline is enabled (note that either of these
           leave_pinned variables may have been set by a user MCA
           param or elsewhere in the code base).  Yes, we could have
           coded this more succinctly, but this is more clear. */
        if (ompi_mpi_leave_pinned > 0 || ompi_mpi_leave_pinned_pipeline) {
            use_mem_hooks = 1;
        }

        if (use_mem_hooks) {
            if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
                ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & 
                 opal_mem_hooks_support_level())) {
                opal_mem_hooks_register_release(mca_mpool_base_mem_cb, NULL);
            } else {
                opal_show_help("help-mpool-base.txt", "leave pinned failed",
                               true, name, OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                               ompi_process_info.nodename);
                return NULL;
            }

            /* Set this to true so that mpool_base_close knows to
               cleanup */
            mca_mpool_base_used_mem_hooks = 1;
        }
    }
    return module; 
}
Exemplo n.º 9
0
/*
 * Create an endpoint and claim the matched modex slot
 */
int
ompi_btl_usnic_create_endpoint(ompi_btl_usnic_module_t *module,
                ompi_btl_usnic_proc_t *proc,
                ompi_btl_usnic_endpoint_t **endpoint_o)
{
    int err;
    int modex_index;
    ompi_btl_usnic_endpoint_t *endpoint;

    /* look for matching modex info */
    err = match_modex(module, proc, &modex_index);
    if (OMPI_SUCCESS != err) {
        opal_output_verbose(5, USNIC_OUT,
                            "btl:usnic:create_endpoint: did not match usnic modex info for peer %s",
                            OMPI_NAME_PRINT(&proc->proc_ompi->proc_name));
        return err;
    }

    endpoint = OBJ_NEW(ompi_btl_usnic_endpoint_t);
    if (NULL == endpoint) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* Initalize the endpoint */
    endpoint->endpoint_module = module;
    assert(modex_index >= 0 && modex_index < (int)proc->proc_modex_count);
    endpoint->endpoint_remote_addr = proc->proc_modex[modex_index];

    /* Initialize endpoint sequence number info */
    endpoint->endpoint_next_seq_to_send = module->local_addr.isn;
    endpoint->endpoint_ack_seq_rcvd = endpoint->endpoint_next_seq_to_send - 1;
    endpoint->endpoint_next_contig_seq_to_recv =
        endpoint->endpoint_remote_addr.isn;
    endpoint->endpoint_highest_seq_rcvd =
        endpoint->endpoint_next_contig_seq_to_recv - 1;
    endpoint->endpoint_rfstart = WINDOW_SIZE_MOD(endpoint->endpoint_next_contig_seq_to_recv);

    /* Defer creating the ibv_ah.  Since calling ibv_create_ah() may
       trigger ARP resolution, it's better to batch all the endpoints'
       calls to ibv_create_ah() together to get some parallelism. */
    endpoint->endpoint_remote_ah = NULL;

    /* Now claim that modex slot */
    proc->proc_modex_claimed[modex_index] = true;
    MSGDEBUG1_OUT("create_endpoint: module=%p claimed endpoint=%p on proc=%p (hash=0x%" PRIx64 ")\n",
                  (void *)module, (void *)endpoint, (void *)proc,
                  ompi_rte_hash_name(&proc->proc_ompi->proc_name));

    /* Save the endpoint on this proc's array of endpoints */
    proc->proc_endpoints[proc->proc_endpoint_count] = endpoint;
    endpoint->endpoint_proc_index = proc->proc_endpoint_count;
    endpoint->endpoint_proc = proc;
    ++proc->proc_endpoint_count;
    OBJ_RETAIN(proc);

    /* also add endpoint to module's list of endpoints */
    opal_list_append(&(module->all_endpoints),
            &(endpoint->endpoint_endpoint_li));

    *endpoint_o = endpoint;
    return OMPI_SUCCESS;
}
Exemplo n.º 10
0
/*
 * For a specific module, see if this proc has matching address/modex
 * info.  If so, create an endpoint and return it.
 *
 * Implementation note: This code relies on the order of modules on a local
 * side matching the order of the modex entries that we send around, otherwise
 * both sides may not agree on a bidirectional connection.  It also assumes
 * that add_procs will be invoked on the local modules in that same order, for
 * the same reason.  If those assumptions do not hold, we will need to
 * canonicalize this match ordering somehow, probably by (jobid,vpid) pair or
 * by the interface MAC or IP address.
 */
static int match_modex(ompi_btl_usnic_module_t *module,
                       ompi_btl_usnic_proc_t *proc,
                       int *index_out)
{
    int err = OMPI_SUCCESS;
    size_t i;
    uint32_t num_modules;
    ompi_btl_usnic_graph_t *g = NULL;
    int nme;
    int *me;
    bool proc_is_left;

    if (NULL == index_out) {
        return OMPI_ERR_BAD_PARAM;
    }
    *index_out = -1;

    num_modules = mca_btl_usnic_component.num_modules;

    opal_output_verbose(20, USNIC_OUT, "btl:usnic:%s: module=%p proc=%p with dimensions %d x %d",
                        __func__, (void *)module, (void *)proc,
                        num_modules, (int)proc->proc_modex_count);

    /* We compute an interface match-up table once for each (module,proc) pair
     * and cache it in the proc.  Store per-proc instead of per-module, since
     * MPI dynamic process routines can add procs but not new modules. */
    if (NULL == proc->proc_ep_match_table) {
        proc->proc_ep_match_table = malloc(num_modules *
                                       sizeof(*proc->proc_ep_match_table));
        if (NULL == proc->proc_ep_match_table) {
            OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }

        /* initialize to "no matches" */
        for (i = 0; i < num_modules; ++i) {
            proc->proc_ep_match_table[i] = -1;
        }

        /* For graphs where all edges are equal (and even for some other
         * graphs), two peers making matching calculations with "mirror image"
         * graphs might not end up with the same matching.  Ensure that both
         * sides are always setting up the exact same graph by always putting
         * the process with the lower (jobid,vpid) on the "left".
         */
        proc_is_left =
            (ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
                                          &proc->proc_ompi->proc_name,
                                          &(ompi_proc_local()->proc_name)) < 0);

        err = create_proc_module_graph(proc, proc_is_left, &g);
        if (OMPI_SUCCESS != err) {
            goto out_free_table;
        }

        nme = 0;
        err = ompi_btl_usnic_solve_bipartite_assignment(g, &nme, &me);
        if (OMPI_SUCCESS != err) {
            OMPI_ERROR_LOG(err);
            goto out_free_graph;
        }

        edge_pairs_to_match_table(proc, proc_is_left, nme, me);

        err = ompi_btl_usnic_gr_free(g);
        if (OMPI_SUCCESS != err) {
            OMPI_ERROR_LOG(err);
            return err;
        }
    }


    if (!proc->proc_match_exists) {
        opal_output_verbose(5, USNIC_OUT, "btl:usnic:%s: unable to find any valid interface pairs for proc %s",
                            __func__, OMPI_NAME_PRINT(&proc->proc_ompi->proc_name));
        return OMPI_ERR_NOT_FOUND;
    }

    /* assuming no strange failure cases, this should always be present */
    if (NULL != proc->proc_ep_match_table && proc->proc_match_exists) {
        for (i = 0; i < num_modules; ++i) {
            if (module == mca_btl_usnic_component.usnic_active_modules[i]) {
                *index_out = proc->proc_ep_match_table[i];
                break;
            }
        }
    }

    /* If MTU does not match, throw an error */
    /* TODO with UDP, do we still want to enforce this restriction or just take
     * the min of the two MTUs?  Another choice is to disqualify this pairing
     * before running the matching algorithm on it. */
    if (*index_out >= 0 &&
        proc->proc_modex[*index_out].mtu != module->if_mtu) {
        opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch",
                    true,
                    ompi_process_info.nodename,
                    ibv_get_device_name(module->device),
                    module->port_num,
                    module->if_mtu,
                    (NULL == proc->proc_ompi->proc_hostname) ?
                    "unknown" : proc->proc_ompi->proc_hostname,
                    proc->proc_modex[*index_out].mtu);
        *index_out = -1;
        return OMPI_ERR_UNREACH;
    }

    return (*index_out == -1 ? OMPI_ERR_NOT_FOUND : OMPI_SUCCESS);

out_free_graph:
    ompi_btl_usnic_gr_free(g);
out_free_table:
    free(proc->proc_ep_match_table);
    proc->proc_ep_match_table = NULL;
    proc->proc_match_exists = false;
    return err;
}
Exemplo n.º 11
0
/**
 * Create a MX process structure. There is a one-to-one correspondence
 * between a ompi_proc_t and a mca_btl_mx_proc_t instance. We cache
 * additional data (specifically the list of mca_btl_mx_endpoint_t instances, 
 * and published addresses) associated w/ a given destination on this
 * datastructure.
 */
mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
{
    mca_btl_mx_proc_t* module_proc = NULL;
    mca_btl_mx_addr_t  *mx_peers;
    int i, j, rc, mx_peers_count, *mx_routing;
    bool at_least_one_route = false;
    size_t size;

    /* Check if we have already created a MX proc
     * structure for this ompi process */
    module_proc = mca_btl_mx_proc_lookup_ompi(ompi_proc);
    if( module_proc != NULL ) {
        return module_proc;  /* Gotcha! */
    }

    /* query for the peer address info */
    rc = ompi_modex_recv( &mca_btl_mx_component.super.btl_version,
				  ompi_proc, (void*)&mx_peers, &size );
    if( OMPI_SUCCESS != rc ) {
        opal_output( 0, "mca_pml_base_modex_recv failed for peer %s",
		     OMPI_NAME_PRINT(&ompi_proc->proc_name) );
	return NULL;
    }

    if( size < sizeof(mca_btl_mx_addr_t) ) {  /* no available connection */
        return NULL;
    }
    if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
        opal_output( 0, "invalid mx address for peer %s",
		     OMPI_NAME_PRINT(&ompi_proc->proc_name) );
	return NULL;
    }
    /* Let's see if we have a way to connect to the remote proc using MX.
     * Without the routing information from the mapper, it is pretty
     * to do this. Right now, we base this connection detection on the last
     * 6 digits of the mapper MAC.
     */
    mx_peers_count = size / sizeof(mca_btl_mx_addr_t);
    mx_routing = (int*)malloc( mx_peers_count * sizeof(int) );
    for( i = 0; i < mx_peers_count; mx_routing[i++] = -1 );

    for( i = 0; i < mx_peers_count; i++ ) {
        mca_btl_mx_module_t* mx_btl;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
        BTL_MX_ADDR_NTOH(mx_peers[rc]);
#endif
	for( j = 0; j < mca_btl_mx_component.mx_num_btls; j++ ) {
	    mx_btl = mca_btl_mx_component.mx_btls[j];
            if( mx_btl->mx_unique_network_id == mx_peers[j].unique_network_id ) {
                /* There is at least one connection between these two nodes */
	        if( -1 == mx_routing[j] ) {
		    /* First connection */
		    mx_routing[j] = i;
		    at_least_one_route = true;
		    break;
		}
		/* If multiple remote endpoints match mine, we keep going. As a
		 * result we will match them in order, i.e. remote endpoint 0
		 * will be connected to local endpoint 0.
		 */
            }
        }
    }
    if( false == at_least_one_route ) {
        free(mx_routing);
	return NULL;
    }

    module_proc = OBJ_NEW(mca_btl_mx_proc_t);
    module_proc->proc_ompi      = ompi_proc;
    module_proc->mx_peers_count = mx_peers_count;
    module_proc->mx_peers       = mx_peers;
    module_proc->mx_routing     = mx_routing;
    return module_proc;
}
Exemplo n.º 12
0
const char *usnic_compat_proc_name_print(opal_process_name_t *pname)
{
    return OMPI_NAME_PRINT(pname);
}
Exemplo n.º 13
0
/* Info keys:
 *
 * - crs:
 *   none    = (Default) No CRS Service
 *   default = Whatever CRS service MPI chooses
 *   blcr    = BLCR
 *   self    = app level callbacks
 *
 * - cmdline:
 *   Command line to restart the process with.
 *   If empty, the user must manually enter it
 *
 * - target:
 *   Absolute path to the target directory.
 *
 * - handle:
 *   first   = Earliest checkpoint directory available
 *   last    = Most recent checkpoint directory available
 *   [global:local] = handle provided by the MPI library
 *
 * - restarting:
 *   0 = not restarting
 *   1 = restarting
 *
 * - checkpointing:
 *   0 = No need to prepare for checkpointing
 *   1 = MPI should prepare for checkpointing
 *
 * - inflight:
 *   default  = message
 *   message  = Drain inflight messages at the message level
 *   network  = Drain inflight messages at the network level (if possible)
 *
 * - user_space_mem:
 *   0 = Memory does not need to be managed
 *   1 = Memory must be in user space (i.e., not on network card
 *
 */
static int extract_info_into_datum(opal_info_t *info, orte_snapc_base_quiesce_t *datum)
{
    int info_flag = false;
    int max_crs_len = 32;
    bool info_bool = false;
    char *info_char = NULL;

    info_char = (char *) malloc(sizeof(char) * (OPAL_PATH_MAX+1));

    /*
     * Key: crs
     */
    opal_info_get(info, "crs", max_crs_len, info_char, &info_flag);
    if( info_flag) {
        datum->crs_name = strdup(info_char);
    }

    /*
     * Key: cmdline
     */
    opal_info_get(info, "cmdline", OPAL_PATH_MAX, info_char, &info_flag);
    if( info_flag) {
        datum->cmdline = strdup(info_char);
    }

    /*
     * Key: handle
     */
    opal_info_get(info, "handle", OPAL_PATH_MAX, info_char, &info_flag);
    if( info_flag) {
        datum->handle = strdup(info_char);
    }

    /*
     * Key: target
     */
    opal_info_get(info, "target", OPAL_PATH_MAX, info_char, &info_flag);
    if( info_flag) {
        datum->target_dir = strdup(info_char);
    }

    /*
     * Key: restarting
     */
    opal_info_get_bool(info, "restarting", &info_bool, &info_flag);
    if( info_flag ) {
        datum->restarting = info_bool;
    } else {
        datum->restarting = false;
    }

    /*
     * Key: checkpointing
     */
    opal_info_get_bool(info, "checkpointing", &info_bool, &info_flag);
    if( info_flag ) {
        datum->checkpointing = info_bool;
    } else {
        datum->checkpointing = false;
    }

    /*
     * Display all values
     */
    OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle,
                         "crcp:bkmrk: %s extract_info: Info('crs' = '%s')",
                         OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                         (NULL == datum->crs_name ? "Default (none)" : datum->crs_name)));
    OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle,
                         "crcp:bkmrk: %s extract_info: Info('cmdline' = '%s')",
                         OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                         (NULL == datum->cmdline ? "Default ()" : datum->cmdline)));
    OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle,
                         "crcp:bkmrk: %s extract_info: Info('checkpointing' = '%c')",
                         OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                         (datum->checkpointing ? 'T' : 'F')));
    OPAL_OUTPUT_VERBOSE((3, mca_crcp_bkmrk_component.super.output_handle,
                         "crcp:bkmrk: %s extract_info: Info('restarting' = '%c')",
                         OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                         (datum->restarting ? 'T' : 'F')));

    if( NULL != info_char ) {
        free(info_char);
        info_char = NULL;
    }

    return OMPI_SUCCESS;
}
Exemplo n.º 14
0
/**
 * Static functions used to configure the interactions between the OPAL and
 * the runtime.
 */
static char*
_process_name_print_for_opal(const opal_process_name_t procname)
{
    ompi_process_name_t* rte_name = (ompi_process_name_t*)&procname;
    return OMPI_NAME_PRINT(rte_name);
}
Exemplo n.º 15
0
/*
 * Create an ompi_btl_usnic_proc_t and initialize it with modex info
 * and an empty array of endpoints.
 *
 * Returns OMPI_ERR_UNREACH if we can't reach the peer (i.e., we can't
 * find their modex data).
 */
static int create_proc(ompi_proc_t *ompi_proc, 
                       ompi_btl_usnic_proc_t **usnic_proc)
{
    ompi_btl_usnic_proc_t *proc = NULL;
    size_t size;
    int rc;

    *usnic_proc = NULL;

    /* Create the proc if it doesn't already exist */
    proc = OBJ_NEW(ompi_btl_usnic_proc_t);
    if (NULL == proc) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* Initialize number of peers */
    proc->proc_endpoint_count = 0;
    proc->proc_ompi = ompi_proc;

    /* query for the peer address info */
    rc = ompi_modex_recv(&mca_btl_usnic_component.super.btl_version,
                         ompi_proc, (void*)&proc->proc_modex,
                         &size);

    /* If this proc simply doesn't have this key, then they're not
       running the usnic BTL -- just ignore them.  Otherwise, show an
       error message. */
    if (OPAL_ERR_DATA_VALUE_NOT_FOUND == rc) {
        OBJ_RELEASE(proc);
        return OMPI_ERR_UNREACH;
    } else if (OMPI_SUCCESS != rc) {
        opal_show_help("help-mpi-btl-usnic.txt",
                       "internal error during init",
                       true,
                       ompi_process_info.nodename,
                       "<none>", 0,
                       "ompi_modex_recv() failed", __FILE__, __LINE__,
                       opal_strerror(rc));
        OBJ_RELEASE(proc);
        return OMPI_ERROR;
    }

    if ((size % sizeof(ompi_btl_usnic_addr_t)) != 0) {
        char msg[1024];

        snprintf(msg, sizeof(msg), 
                 "sizeof(modex for peer %s data) == %d, expected multiple of %d",
                 OMPI_NAME_PRINT(&ompi_proc->proc_name),
                 (int) size, (int) sizeof(ompi_btl_usnic_addr_t));
        opal_show_help("help-mpi-btl-usnic.txt", "internal error during init",
                       true,
                       ompi_process_info.nodename,
                       "<none>", 0,
                       "invalid modex data", __FILE__, __LINE__,
                       msg);

        OBJ_RELEASE(proc);
        return OMPI_ERR_VALUE_OUT_OF_BOUNDS;
    }

    proc->proc_modex_count = size / sizeof(ompi_btl_usnic_addr_t);
    if (0 == proc->proc_modex_count) {
        proc->proc_endpoints = NULL;
        OBJ_RELEASE(proc);
        return OMPI_ERR_UNREACH;
    }

    proc->proc_modex_claimed = (bool*) 
        calloc(proc->proc_modex_count, sizeof(bool));
    if (NULL == proc->proc_modex_claimed) {
        OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
        OBJ_RELEASE(proc);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    proc->proc_endpoints = (mca_btl_base_endpoint_t**)
        calloc(proc->proc_modex_count, sizeof(mca_btl_base_endpoint_t*));
    if (NULL == proc->proc_endpoints) {
        OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
        OBJ_RELEASE(proc);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    *usnic_proc = proc;
    return OMPI_SUCCESS;
}
Exemplo n.º 16
0
/*
 * Create an ompi_btl_usnic_proc_t and initialize it with modex info
 * and an empty array of endpoints.
 */
static ompi_btl_usnic_proc_t *create_proc(ompi_proc_t *ompi_proc)
{
    ompi_btl_usnic_proc_t *proc = NULL;
    size_t size;
    int rc;

    /* Create the proc if it doesn't already exist */
    proc = OBJ_NEW(ompi_btl_usnic_proc_t);
    if (NULL == proc) {
        return NULL;
    }

    /* Initialize number of peers */
    proc->proc_endpoint_count = 0;
    proc->proc_ompi = ompi_proc;

    /* query for the peer address info */
    rc = ompi_modex_recv(&mca_btl_usnic_component.super.btl_version,
                         ompi_proc, (void*)&proc->proc_modex,
                         &size);

    if (OMPI_SUCCESS != rc) {
        opal_show_help("help-mpi-btl-usnic.txt", "internal error during init",
                       true,
                       ompi_process_info.nodename,
                       "<none>", 0,
                       "ompi_modex_recv() failed", __FILE__, __LINE__,
                       opal_strerror(rc));
        OBJ_RELEASE(proc);
        return NULL;
    }

    if ((size % sizeof(ompi_btl_usnic_addr_t)) != 0) {
        char msg[1024];

        snprintf(msg, sizeof(msg), 
                 "sizeof(modex for peer %s data) == %d, expected multiple of %d",
                 OMPI_NAME_PRINT(&ompi_proc->proc_name),
                 (int) size, (int) sizeof(ompi_btl_usnic_addr_t));
        opal_show_help("help-mpi-btl-usnic.txt", "internal error during init",
                       true,
                       ompi_process_info.nodename,
                       "<none>", 0,
                       "invalid modex data", __FILE__, __LINE__,
                       msg);

        OBJ_RELEASE(proc);
        return NULL;
    }

    proc->proc_modex_count = size / sizeof(ompi_btl_usnic_addr_t);
    if (0 == proc->proc_modex_count) {
        proc->proc_endpoints = NULL;
        OBJ_RELEASE(proc);
        return NULL;
    }

    proc->proc_modex_claimed = (bool*) 
        calloc(proc->proc_modex_count, sizeof(bool));
    if (NULL == proc->proc_modex_claimed) {
        OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
        OBJ_RELEASE(proc);
        return NULL;
    }

    proc->proc_endpoints = (mca_btl_base_endpoint_t**)
        calloc(proc->proc_modex_count, sizeof(mca_btl_base_endpoint_t*));
    if (NULL == proc->proc_endpoints) {
        OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
        OBJ_RELEASE(proc);
        return NULL;
    }

    return proc;
}
Exemplo n.º 17
0
void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
    mca_mpool_base_registration_t *reg;
    mca_mpool_base_registration_t *regs[RGPUSM_MPOOL_NREGS];
    int reg_cnt, i;
    int rc;

    /* Statistic */
    if(true == mca_mpool_rgpusm_component.print_stats) {
        opal_output(0, "%s rgpusm: stats "
                "(hit/valid/invalid/miss/evicted): %d/%d/%d/%d/%d\n",
                OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
                mpool_rgpusm->stat_cache_hit, mpool_rgpusm->stat_cache_valid, 
                mpool_rgpusm->stat_cache_invalid, mpool_rgpusm->stat_cache_miss,
                mpool_rgpusm->stat_evicted);
    }

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    do {
        reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
                regs, RGPUSM_MPOOL_NREGS);
        opal_output(-1, "Registration size at finalize = %d", reg_cnt);

        for(i = 0; i < reg_cnt; i++) {
            reg = regs[i];

            if(reg->ref_count) {
                reg->ref_count = 0; /* otherway dereg will fail on assert */
            } else if (mca_mpool_rgpusm_component.leave_pinned) {
                opal_list_remove_item(&mpool_rgpusm->lru_list,
                        (opal_list_item_t*)reg);
            }

            /* Remove from rcache first */
            mpool->rcache->rcache_delete(mpool->rcache, reg);

            /* Drop lock before deregistering memory */
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            assert(reg->ref_count == 0);
            rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
                                                   reg);
            OPAL_THREAD_LOCK(&mpool->rcache->lock);

            if(rc != OMPI_SUCCESS) {
                /* Potentially lose track of registrations
                   do we have to put it back? */
                continue;
            }

            OMPI_FREE_LIST_RETURN_MT(&mpool_rgpusm->reg_list,
                                  (ompi_free_list_item_t*)reg);
        }
    } while(reg_cnt == RGPUSM_MPOOL_NREGS);

    OBJ_DESTRUCT(&mpool_rgpusm->lru_list);
    OBJ_DESTRUCT(&mpool_rgpusm->reg_list);
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

}
Exemplo n.º 18
0
int
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
                            size_t nprocs,
                            struct ompi_proc_t** procs)
{
    int ret, me;
    size_t i;
    bool new_found = false;
    ptl_process_t *maptable;

    if (ompi_mtl_portals4.use_logical) {
        maptable = malloc(sizeof(ptl_process_t) * nprocs);
        if (NULL == maptable) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: malloc failed\n",
                                __FILE__, __LINE__);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
    }

    /* Get the list of ptl_process_id_t from the runtime and copy into structure */
    for (i = 0 ; i < nprocs ; ++i) {
        ptl_process_t *modex_id;
        size_t size;

        if( procs[i] == ompi_proc_local_proc ) {
            me = i;
        }

        if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "Portals 4 MTL does not support heterogeneous operations.");
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "Proc %s architecture %x, mine %x.",
                                OMPI_NAME_PRINT(&procs[i]->super.proc_name),
                                procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch);
            return OMPI_ERR_NOT_SUPPORTED;
        }

        OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
                        &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_modex_recv failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        } else if (sizeof(ptl_process_t) != size) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_modex_recv failed: %d\n",
                                __FILE__, __LINE__, ret);
            return OMPI_ERR_BAD_PARAM;
        }

        if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
            ptl_process_t *peer_id;
            peer_id = malloc(sizeof(ptl_process_t));
            if (NULL == peer_id) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: malloc failed: %d\n",
                                    __FILE__, __LINE__, ret);
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
            if (ompi_mtl_portals4.use_logical) {
                peer_id->rank = i;
                maptable[i].phys.pid = modex_id->phys.pid;
                maptable[i].phys.nid = modex_id->phys.nid;
                opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
                    "logical: global rank=%d pid=%d nid=%d\n",
                    (int)i, maptable[i].phys.pid, maptable[i].phys.nid);
            } else {
                *peer_id = *modex_id;
            }

            procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id;

            new_found = true;
        } else {
            ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
            if (ompi_mtl_portals4.use_logical) {
                if ((size_t)proc->rank != i) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: existing peer and rank don't match\n",
                                    __FILE__, __LINE__);
                    return OMPI_ERROR;
                }
                maptable[i].phys.pid = modex_id->phys.pid;
                maptable[i].phys.nid = modex_id->phys.nid;
            }
            else if (proc->phys.nid != modex_id->phys.nid ||
                     proc->phys.pid != modex_id->phys.pid) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: existing peer and modex peer don't match\n",
                                    __FILE__, __LINE__);
                return OMPI_ERROR;
            }
        }
    }

    if (ompi_mtl_portals4.use_logical) {
        ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: logical mapping failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "logical mapping OK\n");
        free(maptable);
    }

    portals4_init_interface();

    /* activate progress callback */
    ret = opal_progress_register(ompi_mtl_portals4_progress);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    if (new_found) {
        ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: flowctl_add_procs failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
    }
#endif

    return OMPI_SUCCESS;
}