Exemple #1
0
/*
 * Print a show_help kind of message for an items still left in the
 * tree
 */
void mca_mpool_base_tree_print(int show_up_to_mem_leaks)
{
    /* If they asked to show 0 leaks, then don't show anything.  */
    if (0 == show_up_to_mem_leaks) {
        return;
    }

    num_leaks = 0;
    max_mem_leaks = show_up_to_mem_leaks;
    opal_rb_tree_traverse(&mca_mpool_base_tree, condition, action);
    if (0 == num_leaks) {
        return;
    }

    if (num_leaks <= show_up_to_mem_leaks ||
        show_up_to_mem_leaks < 0) {
        opal_show_help("help-mpool-base.txt", "all mem leaks",
                       true, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                       opal_proc_local_get()->proc_hostname,
                       getpid(), leak_msg);
    } else {
        int i = num_leaks - show_up_to_mem_leaks;
        opal_show_help("help-mpool-base.txt", "some mem leaks",
                       true, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                       opal_proc_local_get()->proc_hostname,
                       getpid(), leak_msg, i,
                       (i > 1) ? "s were" : " was",
                       (i > 1) ? "are" : "is");
    }
    free(leak_msg);
    leak_msg = NULL;
}
Exemple #2
0
static int
create_maptable(struct mca_btl_portals4_module_t *portals4_btl,
                size_t                            nprocs,
                opal_proc_t                     **procs,
                mca_btl_base_endpoint_t         **endpoint)
{
    int ret;
    ptl_process_t *maptable;

    maptable = malloc(sizeof(ptl_process_t) * nprocs);
    if (NULL == maptable) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: malloc failed\n",
                            __FILE__, __LINE__);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    for (uint32_t i = 0 ; i < nprocs ; i++) {
        struct opal_proc_t *curr_proc;

        curr_proc = procs[i];

        /* portals doesn't support heterogeneous yet... */
        if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                "Portals 4 BTL does not support heterogeneous operations.");
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                "Proc %s architecture %x, mine %x.",
                OPAL_NAME_PRINT(curr_proc->proc_name),
                curr_proc->proc_arch, opal_proc_local_get()->proc_arch);
            return OPAL_ERR_NOT_SUPPORTED;
        }

        ret = create_peer_and_endpoint(portals4_btl->interface_num,
                                       curr_proc,
                                       &maptable[i],
                                       &endpoint[i]);
        if (OPAL_SUCCESS != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                "%s:%d: create_maptable::create_peer_and_endpoint failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
    }

    ret = PtlSetMap(portals4_btl->portals_ni_h,
                    nprocs,
                    maptable);
    if (OPAL_SUCCESS != ret) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: logical mapping failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }
    opal_output_verbose(90, opal_btl_base_framework.framework_output,
        "logical mapping OK\n");
    free(maptable);

    return OPAL_SUCCESS;
}
Exemple #3
0
/*
 *  memory hook callback, called when memory is free'd out from under
 *  us.  Be wary of the from_alloc flag -- if you're called with
 *  from_alloc==true, then you cannot call malloc (or any of its
 *  friends)!
 */
void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata, 
                           bool from_alloc)
{
    mca_mpool_base_selected_module_t* current;
    int rc;
    opal_list_item_t* item;

    /* Only do anything meaningful if the OPAL layer is up and running
       and size != 0 */
    if ((from_alloc && (!opal_initialized)) ||
        size == 0) { 
        return;
    }
          
    for(item = opal_list_get_first(&mca_mpool_base_modules);
        item != opal_list_get_end(&mca_mpool_base_modules);
        item = opal_list_get_next(item)) {
        
        current = (mca_mpool_base_selected_module_t*) item; 

        if(current->mpool_module->mpool_release_memory != NULL) {
            rc = current->mpool_module->mpool_release_memory(current->mpool_module,
                    base, size);

            if (rc != OPAL_SUCCESS) {
                if (from_alloc) {
                    int len;
                    len = snprintf(msg, sizeof(msg), "[%s:%d] Attempt to free memory that is still in use by an ongoing MPI communication (buffer %p, size %lu).  MPI job will now abort.\n",
                             opal_proc_local_get()->proc_hostname,
                             getpid(),
                             base, (unsigned long) size);
                    msg[sizeof(msg) - 1] = '\0';
                    write(2, msg, len);
                } else {
                    opal_show_help("help-mpool-base.txt", 
                                   "cannot deregister in-use memory", true,
                                   current->mpool_component->mpool_version.mca_component_name,
                                   opal_proc_local_get()->proc_hostname,
                                   base, (unsigned long) size);
                }

                /* We're in a callback from somewhere; we can't do
                   anything meaningful to pass an error back up.  :-(
                   So just exit.  Call _exit() so that we don't try to
                   call anything on the way out -- just exit!
                   (remember that we're in a callback, and state may
                   be very undefined at this point...) */
                _exit(1);
            }
        }
    }
}
Exemple #4
0
/* ////////////////////////////////////////////////////////////////////////// */
static mca_common_sm_module_t *
attach_and_init(opal_shmem_ds_t *shmem_bufp,
                size_t size,
                size_t size_ctl_structure,
                size_t data_seg_alignment,
                bool first_call)
{
    mca_common_sm_module_t *map = NULL;
    mca_common_sm_seg_header_t *seg = NULL;
    unsigned char *addr = NULL;

    /* attach to the specified segment. note that at this point, the contents of
     * *shmem_bufp have already been initialized via opal_shmem_segment_create.
     */
    if (NULL == (seg = (mca_common_sm_seg_header_t *)
                       opal_shmem_segment_attach(shmem_bufp))) {
        return NULL;
    }
    opal_atomic_rmb();

    if (NULL == (map = OBJ_NEW(mca_common_sm_module_t))) {
        OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
        (void)opal_shmem_segment_detach(shmem_bufp);
        return NULL;
    }

    /* copy meta information into common sm module
     *                                     from ====> to                */
    if (OPAL_SUCCESS != opal_shmem_ds_copy(shmem_bufp, &map->shmem_ds)) {
        (void)opal_shmem_segment_detach(shmem_bufp);
        free(map);
        return NULL;
    }

    /* the first entry in the file is the control structure. the first
     * entry in the control structure is an mca_common_sm_seg_header_t
     * element.
     */
    map->module_seg = seg;

    addr = ((unsigned char *)seg) + size_ctl_structure;
    /* if we have a data segment (i.e., if 0 != data_seg_alignment),
     * then make it the first aligned address after the control
     * structure.  IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN
     * OPEN MPI!
     */
    if (0 != data_seg_alignment) {
        addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *);
        /* is addr past end of the shared memory segment? */
        if ((unsigned char *)seg + shmem_bufp->seg_size < addr) {
            opal_show_help("help-mpi-common-sm.txt", "mmap too small", 1,
                           opal_proc_local_get()->proc_hostname,
                           (unsigned long)shmem_bufp->seg_size,
                           (unsigned long)size_ctl_structure,
                           (unsigned long)data_seg_alignment);
            (void)opal_shmem_segment_detach(shmem_bufp);
            free(map);
            return NULL;
        }
    }
Exemple #5
0
/*
 * Note that this routine must be called with the lock on the process
 * already held.  Insert a btl instance into the proc array and assign
 * it an address.
 */
int mca_btl_openib_proc_insert(mca_btl_openib_proc_t* module_proc,
        mca_btl_base_endpoint_t* module_endpoint)
{
    /* insert into endpoint array */


#ifndef WORDS_BIGENDIAN
    /* if we are little endian and our peer is not so lucky, then we
       need to put all information sent to him in big endian (aka
       Network Byte Order) and expect all information received to
       be in NBO.  Since big endian machines always send and receive
       in NBO, we don't care so much about that case. */
    if (module_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
        module_endpoint->nbo = true;
    }
#endif

    /* only allow eager rdma if the peers agree on the size of a long */
    if((module_proc->proc_opal->proc_arch & OPAL_ARCH_LONGISxx) !=
       (opal_proc_local_get()->proc_arch & OPAL_ARCH_LONGISxx)) {
        module_endpoint->use_eager_rdma = false;
    }

    module_endpoint->endpoint_proc = module_proc;
    module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
    return OPAL_SUCCESS;
}
Exemple #6
0
int mca_mpool_sm_ft_event(int state) {
    mca_mpool_base_module_t *self_module = NULL;
    mca_mpool_sm_module_t   *self_sm_module = NULL;
    char * file_name = NULL;

    if(OPAL_CRS_CHECKPOINT == state) {
        /* Record the shared memory filename */
        asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
                  opal_process_info.job_session_dir,
                  opal_proc_local_get()->proc_hostname );
        /* Disabled to get FT code compiled again
         * TODO: FIXIT soon
        orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name);
         */
        free(file_name);
        file_name = NULL;
    }
    else if(OPAL_CRS_CONTINUE == state) {
        if (opal_cr_continue_like_restart) {
            /* Find the sm module */
            self_module = mca_mpool_base_module_lookup("sm");
            self_sm_module = (mca_mpool_sm_module_t*) self_module;

            /* Mark the old sm file for eventual removal via CRS */
            if (NULL != self_sm_module->sm_common_module) {
                opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false);
            }

            /* Remove self from the list of all modules */
            mca_mpool_base_module_destroy(self_module);
        }
    }
    else if(OPAL_CRS_RESTART == state ||
            OPAL_CRS_RESTART_PRE == state) {
        /* Find the sm module */
        self_module = mca_mpool_base_module_lookup("sm");
        self_sm_module = (mca_mpool_sm_module_t*) self_module;

        /* Mark the old sm file for eventual removal via CRS */
        if (NULL != self_sm_module->sm_common_module) {
            opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false);
        }

        /* Remove self from the list of all modules */
        mca_mpool_base_module_destroy(self_module);
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }

    return OPAL_SUCCESS;
}
Exemple #7
0
static int cray_put(opal_pmix_scope_t scope,
                  opal_value_t *kv)
{
    int rc;
    char* buffer_to_put;
    int rem_offset = 0;
    int data_to_put = 0;

    opal_output_verbose(10, opal_pmix_base_framework.framework_output,
                        "%s pmix:cray cray_put my name is %ld\n",
                         OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), opal_proc_local_get()->proc_name);

    if (OPAL_SUCCESS != (rc = opal_pmix_base_store_encoded (kv->key, (void*)&kv->data, kv->type, 
                                                            &pmix_packed_data, &pmix_packed_data_offset))) {
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    if (pmix_packed_data_offset == 0) {
        /* nothing to write */
        return OPAL_SUCCESS;
    }

    if (pmix_packed_data_offset < pmix_vallen_max) {
        /* this meta-key is still being filled,
         * nothing to put yet
         */
        return OPAL_SUCCESS;
    }

    /* encode only full filled meta keys */
    rem_offset = pmix_packed_data_offset % pmix_vallen_max;
    data_to_put = pmix_packed_data_offset - rem_offset;
    buffer_to_put = (char*)malloc(data_to_put);
    memcpy(buffer_to_put, pmix_packed_data, data_to_put);

    opal_pmix_base_commit_packed (buffer_to_put, data_to_put, pmix_vallen_max, &pmix_pack_key, kvs_put);

    free(buffer_to_put);
    pmix_packed_data_offset = rem_offset;
    if (0 == pmix_packed_data_offset) {
        free(pmix_packed_data);
        pmix_packed_data = NULL;
    } else {
        memmove (pmix_packed_data, pmix_packed_data + data_to_put, pmix_packed_data_offset);
        pmix_packed_data = realloc (pmix_packed_data, pmix_packed_data_offset);
    }

    return rc;
}
void mca_btl_base_error_no_nics(const char* transport, 
                                const char* nic_name)
{
    char *procid;
    if (mca_btl_base_warn_component_unused) {
        /* print out no-nic warning if user told us to */
        asprintf(&procid, "%s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

        opal_show_help("help-mpi-btl-base.txt", "btl:no-nics",
                       true, procid, transport, opal_proc_local_get()->proc_hostname,
                       nic_name);
        free(procid);
    }
}
Exemple #9
0
int opal_common_verbs_fork_test(void)
{
    int ret = OPAL_SUCCESS;

    /* Make sure that ibv_fork_init() is the first ibv_* function to
       be invoked in this process. */
#ifdef HAVE_IBV_FORK_INIT
    if (0 != opal_common_verbs_want_fork_support) {
        /* Check if fork support is requested by the user */
        if (0 != ibv_fork_init()) {
            /* If the opal_common_verbs_want_fork_support MCA
             * parameter is >0 but the call to ibv_fork_init() failed,
             * then return an error code.
             */
            if (opal_common_verbs_want_fork_support > 0) {
                opal_show_help("help-opal-common-verbs.txt",
                               "ibv_fork_init fail", true,
                               opal_proc_local_get()->proc_hostname, errno,
                               strerror(errno));
                ret = OPAL_ERROR;
            }
        }
    }
#endif

#if OPAL_COMMON_VERBS_USNIC_HAPPY
    /* Now register any necessary fake libibverbs drivers.  We
       piggyback loading these fake drivers on the fork test because
       they must be loaded before ibv_get_device_list() is invoked.
       Note that this routine is in a different common component (see
       comments over there for an explanation why).  */
    opal_common_verbs_usnic_register_fake_drivers();
#endif

    return ret;
}
void opal_btl_usnic_exit(opal_btl_usnic_module_t *module)
{
    if (NULL == module) {
        /* Find the first module with an error callback */
        for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
            if (NULL != mca_btl_usnic_component.usnic_active_modules &&
                NULL != mca_btl_usnic_component.usnic_active_modules[i] &&
                NULL != mca_btl_usnic_component.usnic_active_modules[i]->pml_error_callback) {
                module = mca_btl_usnic_component.usnic_active_modules[i];
                break;
            }
        }
        /* If we didn't find a PML error callback, just exit. */
        if (NULL == module) {
            fprintf(stderr, "*** The Open MPI usnic BTL is aborting the MPI job (via exit(3)).\n");
            fflush(stderr);
            exit(1);
        }
    }

    /* After discussion with George, we decided that it was safe to
       cast away the const from opal_proc_local_get() -- the error
       function needs to be smart enough to not take certain actions
       if the passed proc is yourself (e.g., don't call del_procs() on
       yourself). */
    if (NULL != module->pml_error_callback) {
        module->pml_error_callback(&module->super,
                                   MCA_BTL_ERROR_FLAGS_FATAL,
                                   (opal_proc_t*) opal_proc_local_get(),
                                   "The usnic BTL is aborting the MPI job (via PML error callback).");
    }

    /* If the PML error callback returns (or if there wasn't one),
       just exit.  Shrug. */
    exit(1);
}
Exemple #11
0
int mca_btl_smcuda_add_procs(
    struct mca_btl_base_module_t* btl,
    size_t nprocs,
    struct opal_proc_t **procs,
    struct mca_btl_base_endpoint_t **peers,
    opal_bitmap_t* reachability)
{
    int return_code = OPAL_SUCCESS;
    int32_t n_local_procs = 0, proc, j, my_smp_rank = -1;
    const opal_proc_t* my_proc; /* pointer to caller's proc structure */
    mca_btl_smcuda_t *smcuda_btl;
    bool have_connected_peer = false;
    char **bases;
    /* for easy access to the mpool_sm_module */
    mca_mpool_sm_module_t *sm_mpool_modp = NULL;

    /* initializion */

    smcuda_btl = (mca_btl_smcuda_t *)btl;

    /* get pointer to my proc structure */
    if(NULL == (my_proc = opal_proc_local_get()))
        return OPAL_ERR_OUT_OF_RESOURCE;

    /* Get unique host identifier for each process in the list,
     * and idetify procs that are on this host.  Add procs on this
     * host to shared memory reachbility list.  Also, get number
     * of local procs in the procs list. */
    for (proc = 0; proc < (int32_t)nprocs; proc++) {
        /* check to see if this proc can be reached via shmem (i.e.,
           if they're on my local host and in my job) */
        if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
            !OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) {
            peers[proc] = NULL;
            continue;
        }
        /* check to see if this is me */
        if(my_proc == procs[proc]) {
            my_smp_rank = mca_btl_smcuda_component.my_smp_rank = n_local_procs++;
            continue;
        }

        /* we have someone to talk to */
        have_connected_peer = true;

        if(!(peers[proc] = create_sm_endpoint(n_local_procs, procs[proc]))) {
            return_code = OPAL_ERROR;
            goto CLEANUP;
        }
#if OPAL_CUDA_SUPPORT
        peers[proc]->proc_opal = procs[proc];
        peers[proc]->ipcstate = IPC_INIT;
        peers[proc]->ipctries = 0;
#endif /* OPAL_CUDA_SUPPORT */
        n_local_procs++;

        /* add this proc to shared memory accessibility list */
        return_code = opal_bitmap_set_bit(reachability, proc);
        if(OPAL_SUCCESS != return_code)
            goto CLEANUP;
    }

    /* jump out if there's not someone we can talk to */
    if (!have_connected_peer)
        goto CLEANUP;

    /* make sure that my_smp_rank has been defined */
    if (-1 == my_smp_rank) {
        return_code = OPAL_ERROR;
        goto CLEANUP;
    }

    if (!smcuda_btl->btl_inited) {
        return_code =
            smcuda_btl_first_time_init(smcuda_btl, my_smp_rank,
                                   mca_btl_smcuda_component.sm_max_procs);
        if (return_code != OPAL_SUCCESS) {
            goto CLEANUP;
        }
    }

    /* set local proc's smp rank in the peers structure for
     * rapid access and calculate reachability */
    for(proc = 0; proc < (int32_t)nprocs; proc++) {
        if(NULL == peers[proc])
            continue;
        mca_btl_smcuda_component.sm_peers[peers[proc]->peer_smp_rank] = peers[proc];
        peers[proc]->my_smp_rank = my_smp_rank;
    }

    bases = mca_btl_smcuda_component.shm_bases;
    sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_smcuda_component.sm_mpool;

    /* initialize own FIFOs */
    /*
     * The receiver initializes all its FIFOs.  All components will
     * be allocated near the receiver.  Nothing will be local to
     * "the sender" since there will be many senders.
     */
    for(j = mca_btl_smcuda_component.num_smp_procs;
        j < mca_btl_smcuda_component.num_smp_procs + FIFO_MAP_NUM(n_local_procs); j++) {

        return_code = sm_fifo_init( mca_btl_smcuda_component.fifo_size,
                                    mca_btl_smcuda_component.sm_mpool,
                                   &mca_btl_smcuda_component.fifo[my_smp_rank][j],
                                    mca_btl_smcuda_component.fifo_lazy_free);
        if(return_code != OPAL_SUCCESS)
            goto CLEANUP;
    }

    opal_atomic_wmb();

    /* Sync with other local procs. Force the FIFO initialization to always
     * happens before the readers access it.
     */
    (void)opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
    while( n_local_procs >
           mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) {
        opal_progress();
        opal_atomic_rmb();
    }

    /* it is now safe to unlink the shared memory segment. only one process
     * needs to do this, so just let smp rank zero take care of it. */
    if (0 == my_smp_rank) {
        if (OPAL_SUCCESS !=
            mca_common_sm_module_unlink(mca_btl_smcuda_component.sm_seg)) {
            /* it is "okay" if this fails at this point. we have gone this far,
             * so just warn about the failure and continue. this is probably
             * only triggered by a programming error. */
            opal_output(0, "WARNING: common_sm_module_unlink failed.\n");
        }
        /* SKG - another abstraction violation here, but I don't want to add
         * extra code in the sm mpool for further synchronization. */

        /* at this point, all processes have attached to the mpool segment. so
         * it is safe to unlink it here. */
        if (OPAL_SUCCESS !=
            mca_common_sm_module_unlink(sm_mpool_modp->sm_common_module)) {
            opal_output(0, "WARNING: common_sm_module_unlink failed.\n");
        }
        if (-1 == unlink(mca_btl_smcuda_component.sm_mpool_rndv_file_name)) {
            opal_output(0, "WARNING: %s unlink failed.\n",
                        mca_btl_smcuda_component.sm_mpool_rndv_file_name);
        }
        if (-1 == unlink(mca_btl_smcuda_component.sm_rndv_file_name)) {
            opal_output(0, "WARNING: %s unlink failed.\n",
                        mca_btl_smcuda_component.sm_rndv_file_name);
        }
    }

    /* free up some space used by the name buffers */
    free(mca_btl_smcuda_component.sm_mpool_ctl_file_name);
    free(mca_btl_smcuda_component.sm_mpool_rndv_file_name);
    free(mca_btl_smcuda_component.sm_ctl_file_name);
    free(mca_btl_smcuda_component.sm_rndv_file_name);

    /* coordinate with other processes */
    for(j = mca_btl_smcuda_component.num_smp_procs;
        j < mca_btl_smcuda_component.num_smp_procs + n_local_procs; j++) {
        ptrdiff_t diff;

        /* spin until this element is allocated */
        /* doesn't really wait for that process... FIFO might be allocated, but not initialized */
        opal_atomic_rmb();
        while(NULL == mca_btl_smcuda_component.shm_fifo[j]) {
            opal_progress();
            opal_atomic_rmb();
        }

        /* Calculate the difference as (my_base - their_base) */
        diff = ADDR2OFFSET(bases[my_smp_rank], bases[j]);

        /* store local address of remote fifos */
        mca_btl_smcuda_component.fifo[j] =
            (sm_fifo_t*)OFFSET2ADDR(diff, mca_btl_smcuda_component.shm_fifo[j]);

        /* cache local copy of peer memory node number */
        mca_btl_smcuda_component.mem_nodes[j] = mca_btl_smcuda_component.shm_mem_nodes[j];
    }

    /* update the local smp process count */
    mca_btl_smcuda_component.num_smp_procs += n_local_procs;

    /* make sure we have enough eager fragmnents for each process */
    return_code = opal_free_list_resize_mt (&mca_btl_smcuda_component.sm_frags_eager,
                                            mca_btl_smcuda_component.num_smp_procs * 2);
    if (OPAL_SUCCESS != return_code)
        goto CLEANUP;

CLEANUP:
    return return_code;
}
Exemple #12
0
/*
 * Find all the CPCs that are eligible for a single local port (i.e.,
 * openib module).
 */
int opal_common_ofacm_base_select_for_local_port(opal_common_ofacm_base_dev_desc_t *dev,
        opal_common_ofacm_base_module_t ***cpcs, int *num_cpcs)
{
    char *msg = NULL;
    int i, rc, cpc_index, len;
    opal_common_ofacm_base_module_t **tmp_cpcs;

    tmp_cpcs = calloc(num_available, 
                  sizeof(opal_common_ofacm_base_module_t *));
    if (NULL == tmp_cpcs) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* Go through all available CPCs and query them to see if they
       want to run on this module.  If they do, save them to a running
       array. */
    for (len = 1, i = 0; NULL != available[i]; ++i) {
        len += strlen(available[i]->cbc_name) + 2;
    }
    msg = malloc(len);
    if (NULL == msg) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    msg[0] = '\0';
    for (cpc_index = i = 0; NULL != available[i]; ++i) {
        if (i > 0) {
            strcat(msg, ", ");
        }
        strcat(msg, available[i]->cbc_name);

        rc = available[i]->cbc_query(dev, &tmp_cpcs[cpc_index]);
        if (OPAL_ERR_NOT_SUPPORTED == rc || OPAL_ERR_UNREACH == rc) {
            continue;
        } else if (OPAL_SUCCESS != rc) {
            free(tmp_cpcs);
            free(msg);
            return rc;
        }
        OFACM_VERBOSE(("match cpc for local port: %s",
                    available[i]->cbc_name));

        /* If the CPC wants to use the CTS protocol, check to ensure
           that QP 0 is PP; if it's not, we can't use this CPC (or the
           CTS protocol) */
        /* Pasha: Wrong place to check qp type, should be moved to CMs 
        if (cpcs[cpc_index]->cbm_uses_cts &&
            !BTL_OPENIB_QP_TYPE_PP(0)) {
            OFACM_VERBOSE(("this CPC only supports when the first btl_openib_receive_queues QP is a PP QP"));
            continue;
        }
        */

        /* This CPC has indicated that it wants to run on this openib
           BTL module.  Woo hoo! */
        ++cpc_index;
    }

    /* If we got an empty array, then no CPCs were eligible.  Doh! */
    if (0 == cpc_index) {
        opal_show_help("help-mpi-common-ofacm-cpc-base.txt",
                       "no cpcs for port", true,
                       opal_proc_local_get()->proc_hostname,
                       ibv_get_device_name(dev->ib_dev),
                       msg);
        free(tmp_cpcs);
        free(msg);
        return OPAL_ERR_NOT_SUPPORTED;
    }
    free(msg);

    /* We got at least one eligible CPC; save the array into the
       module's port_info */
    *num_cpcs = cpc_index;
    *cpcs = tmp_cpcs;

    return OPAL_SUCCESS;
}
Exemple #13
0
/*
 * Register MCA parameters
 */
int opal_common_ofacm_base_register(mca_base_component_t *base)
{
    int i, j, save;
    char **temp = NULL, *string = NULL, *all_cpc_names = NULL;

    if (opal_common_ofacm_base_register_was_called) {
        return OPAL_SUCCESS;
    }

    opal_common_ofacm_base_register_was_called = true;

    /* Make an MCA parameter to select which connect module to use */
    for (i = 0; NULL != all[i]; ++i) {
        /* The CPC name "empty" is reserved for "fake" CPC modules */
        if (0 != strcmp(all[i]->cbc_name, "empty")) {
            opal_argv_append_nosize(&temp, all[i]->cbc_name);
        }
    }
    all_cpc_names = opal_argv_join(temp, ',');
    opal_argv_free(temp);
    asprintf(&string,
             "Method used to select OpenFabrics connections (valid values: %s)",
             all_cpc_names);

    opal_common_ofacm_cpc_include = NULL;
    (void) mca_base_component_var_register(base, "ofacm_cpc_include", string,
                                           MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &opal_common_ofacm_cpc_include);
    free(string);

    asprintf(&string,
             "Method used to exclude OpenFabrics connections (valid values: %s)",
             all_cpc_names);
    opal_common_ofacm_cpc_exclude = NULL;
    (void) mca_base_component_var_register(base, "ofacm_cpc_exclude", string,
                                           MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                           OPAL_INFO_LVL_9,
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &opal_common_ofacm_cpc_exclude);
    free(string);

    /* Register the name of the file containing the fabric's Service Levels (SL) */
    opal_common_ofacm_three_dim_torus = NULL;
    (void) mca_base_var_register("ompi", "common", "ofacm", "three_dim_torus",
                                 "The name of the file contating Service Level (SL) data for 3D-Torus cluster",
                                 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                 OPAL_INFO_LVL_9,
                                 MCA_BASE_VAR_SCOPE_READONLY,
                                 &opal_common_ofacm_three_dim_torus);

    opal_common_ofacm_base_verbose = 0;
    (void) mca_base_var_register("ompi", "common", "ofacm", "base_verbose",
                                 "Verbosity level of the OFACM framework",
                                 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                 OPAL_INFO_LVL_9,
                                 MCA_BASE_VAR_SCOPE_READONLY,
                                 &opal_common_ofacm_base_verbose);


    /* Parse the if_[in|ex]clude paramters to come up with a list of
       CPCs that are available */
    available = calloc(1, sizeof(all));

    /* If we have an "include" list, then find all those CPCs and put
       them in available[] */
    if (NULL != opal_common_ofacm_cpc_include) {
        cpc_explicitly_defined = true;
        temp = opal_argv_split(opal_common_ofacm_cpc_include, ',');
        for (save = j = 0; NULL != temp[j]; ++j) {
            for (i = 0; NULL != all[i]; ++i) {
                if (0 == strcmp(temp[j], all[i]->cbc_name)) { 
                    OFACM_VERBOSE(("include: saving %s", all[i]->cbc_name));
                    available[save++] = all[i];
                    ++num_available;
                    break;
                }
            }
            if (NULL == all[i]) {
                opal_show_help("help-mpi-common-ofacm-cpc-base.txt",
                               "cpc name not found", true,
                               "include", opal_proc_local_get()->proc_hostname,
                               "include", opal_common_ofacm_cpc_include, temp[j], 
                               all_cpc_names);
                opal_argv_free(temp);
                free(all_cpc_names);
                return OPAL_ERR_NOT_FOUND;
            }
        }
        opal_argv_free(temp);
    }

    /* Otherwise, if we have an "exclude" list, take all the CPCs that
       are not in that list and put them in available[] */
    else if (NULL != opal_common_ofacm_cpc_exclude) {
        cpc_explicitly_defined = true;
        temp = opal_argv_split(opal_common_ofacm_cpc_exclude, ',');
        /* First: error check -- ensure that all the names are valid */
        for (j = 0; NULL != temp[j]; ++j) {
            for (i = 0; NULL != all[i]; ++i) {
                if (0 == strcmp(temp[j], all[i]->cbc_name)) { 
                    break;
                }
            }
            if (NULL == all[i]) {
                opal_show_help("help-mpi-common-ofacm-cpc-base.txt",
                               "cpc name not found", true,
                               "exclude", opal_proc_local_get()->proc_hostname,
                               "exclude", opal_common_ofacm_cpc_exclude, temp[j], 
                               all_cpc_names);
                opal_argv_free(temp);
                free(all_cpc_names);
                return OPAL_ERR_NOT_FOUND;
            }
        }

        /* Now do the exclude */
        for (save = i = 0; NULL != all[i]; ++i) {
            for (j = 0; NULL != temp[j]; ++j) {
                if (0 == strcmp(temp[j], all[i]->cbc_name)) {
                    break;
                }
            }
            if (NULL == temp[j]) {
                OFACM_VERBOSE(("exclude: saving %s", all[i]->cbc_name));
                available[save++] = all[i];
                ++num_available;
            }
        }
        opal_argv_free(temp);
    } 

    /* If there's no include/exclude list, copy all[] into available[] */
    else {
        OFACM_VERBOSE(("no include or exclude: saving all"));
        memcpy(available, all, sizeof(all));
        num_available = (sizeof(all) / 
                         sizeof(opal_common_ofacm_base_module_t *)) - 1;
    }

    /* Call the register function on all the CPCs so that they may
       setup any MCA params specific to the connection type */
    for (i = 0; NULL != available[i]; ++i) {
        if (NULL != available[i]->cbc_register) {
            available[i]->cbc_register();
        }
    }

    return OPAL_SUCCESS;
}
Exemple #14
0
static int init_ud_qp(struct ibv_context *context_arg,
                      struct mca_btl_openib_sa_qp_cache *cache)
{
    struct ibv_qp_init_attr iattr;
    struct ibv_qp_attr mattr;
    int rc;

    /* create cq */
    cache->cq = ibv_create_cq(cache->context, 4, NULL, NULL, 0);
    if (NULL == cache->cq) {
        BTL_ERROR(("error creating cq, errno says %s", strerror(errno)));
        opal_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
                true, opal_proc_local_get()->proc_hostname,
                __FILE__, __LINE__, "ibv_create_cq",
                strerror(errno), errno,
                ibv_get_device_name(context_arg->device));
        return OPAL_ERROR;
    }

    /* create qp */
    memset(&iattr, 0, sizeof(iattr));
    iattr.send_cq = cache->cq;
    iattr.recv_cq = cache->cq;
    iattr.cap.max_send_wr = 2;
    iattr.cap.max_recv_wr = 2;
    iattr.cap.max_send_sge = 1;
    iattr.cap.max_recv_sge = 1;
    iattr.qp_type = IBV_QPT_UD;
    cache->qp = ibv_create_qp(cache->pd, &iattr);
    if (NULL == cache->qp) {
        BTL_ERROR(("error creating qp %s (%d)", strerror(errno), errno));
        return OPAL_ERROR;
    }

    /* modify qp to IBV_QPS_INIT */
    memset(&mattr, 0, sizeof(mattr));
    mattr.qp_state = IBV_QPS_INIT;
    mattr.port_num = cache->port_num;
    mattr.qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY);
    rc = ibv_modify_qp(cache->qp, &mattr,
            IBV_QP_STATE              |
            IBV_QP_PKEY_INDEX         |
            IBV_QP_PORT               |
            IBV_QP_QKEY);
    if (rc) {
        BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_INIT errno says: %s [%d]",
                    cache->qp->qp_num, strerror(errno), errno));
        return OPAL_ERROR;
    }

    /* modify qp to IBV_QPS_RTR */
    memset(&mattr, 0, sizeof(mattr));
    mattr.qp_state = IBV_QPS_RTR;
    rc = ibv_modify_qp(cache->qp, &mattr, IBV_QP_STATE);
    if (rc) {
        BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_RTR errno says: %s [%d]",
                    cache->qp->qp_num, strerror(errno), errno));
        return OPAL_ERROR;
    }

    /* modify qp to IBV_QPS_RTS */
    mattr.qp_state = IBV_QPS_RTS;
    rc = ibv_modify_qp(cache->qp, &mattr, IBV_QP_STATE | IBV_QP_SQ_PSN);
    if (rc) {
        BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_RTR errno says: %s [%d]",
                    cache->qp->qp_num, strerror(errno), errno));
        return OPAL_ERROR;
    }

    return OPAL_SUCCESS;
}
Exemple #15
0
int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, void *local_address,
                        uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
                        mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
                        int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
    mca_btl_openib_get_frag_t* frag = NULL;
    int qp = order;
    int rc;

    if (OPAL_UNLIKELY(size > btl->btl_get_limit)) {
        return OPAL_ERR_BAD_PARAM;
    }

    frag = to_get_frag(alloc_recv_user_frag());
    if (OPAL_UNLIKELY(NULL == frag)) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    if (MCA_BTL_NO_ORDER == qp) {
        qp = mca_btl_openib_component.rdma_qp;
    }

    /* set base descriptor flags */
    to_base_frag(frag)->base.order = qp;
    /* free this descriptor when the operation is complete */
    to_base_frag(frag)->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;

    /* set up scatter-gather entry */
    to_com_frag(frag)->sg_entry.length = size;
    to_com_frag(frag)->sg_entry.lkey = local_handle->lkey;
    to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t) local_address;
    to_com_frag(frag)->endpoint = ep;

    /* set up rdma callback */
    frag->cb.func = cbfunc;
    frag->cb.context = cbcontext;
    frag->cb.data = cbdata;
    frag->cb.local_handle = local_handle;

    /* set up descriptor */
    frag->sr_desc.wr.rdma.remote_addr = remote_address;
    /* the opcode may have been changed by an atomic operation */
    frag->sr_desc.opcode = IBV_WR_RDMA_READ;

#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
    if((ep->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
            != (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
        frag->sr_desc.wr.rdma.rkey = opal_swap_bytes4 (remote_handle->rkey);
    } else
#endif
    {
        frag->sr_desc.wr.rdma.rkey = remote_handle->rkey;
    }

#if HAVE_XRC
    if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
        frag->sr_desc.qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
        frag->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
    }
#endif

    if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
        OPAL_THREAD_LOCK(&ep->endpoint_lock);
        rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags);
        OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
        if (OPAL_ERR_RESOURCE_BUSY == rc) {
            return OPAL_SUCCESS;
        }

        if (OPAL_SUCCESS != rc) {
            MCA_BTL_IB_FRAG_RETURN (frag);
            return rc;
        }
    }

    rc = mca_btl_openib_get_internal (btl, ep, frag);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
            rc = OPAL_SUCCESS;

            OPAL_THREAD_LOCK(&ep->endpoint_lock);
            opal_list_append(&ep->pending_get_frags, (opal_list_item_t*)frag);
            OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
        } else {
            MCA_BTL_IB_FRAG_RETURN (frag);
        }
    }

    return rc;
}
/*
 * For a specific module, see if this proc has matching address/modex
 * info.  If so, create an endpoint and return it.
 *
 * Implementation note: This code relies on the order of modules on a local
 * side matching the order of the modex entries that we send around, otherwise
 * both sides may not agree on a bidirectional connection.  It also assumes
 * that add_procs will be invoked on the local modules in that same order, for
 * the same reason.  If those assumptions do not hold, we will need to
 * canonicalize this match ordering somehow, probably by (jobid,vpid) pair or
 * by the interface MAC or IP address.
 */
static int match_modex(opal_btl_usnic_module_t *module,
                       opal_btl_usnic_proc_t *proc,
                       int *index_out)
{
    int err = OPAL_SUCCESS;
    size_t i;
    uint32_t num_modules;
    opal_btl_usnic_graph_t *g = NULL;
    int nme;
    int *me;
    bool proc_is_left;

    if (NULL == index_out) {
        return OPAL_ERR_BAD_PARAM;
    }
    *index_out = -1;

    num_modules = mca_btl_usnic_component.num_modules;

    opal_output_verbose(20, USNIC_OUT, "btl:usnic:%s: module=%p proc=%p with dimensions %d x %d",
                        __func__, (void *)module, (void *)proc,
                        num_modules, (int)proc->proc_modex_count);

    /* We compute an interface match-up table once for each (module,proc) pair
     * and cache it in the proc.  Store per-proc instead of per-module, since
     * MPI dynamic process routines can add procs but not new modules. */
    if (NULL == proc->proc_ep_match_table) {
        proc->proc_ep_match_table = malloc(num_modules *
                                       sizeof(*proc->proc_ep_match_table));
        if (NULL == proc->proc_ep_match_table) {
            OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
            return OPAL_ERR_OUT_OF_RESOURCE;
        }

        /* initialize to "no matches" */
        for (i = 0; i < num_modules; ++i) {
            proc->proc_ep_match_table[i] = -1;
        }

        /* For graphs where all edges are equal (and even for some other
         * graphs), two peers making matching calculations with "mirror image"
         * graphs might not end up with the same matching.  Ensure that both
         * sides are always setting up the exact same graph by always putting
         * the process with the lower (jobid,vpid) on the "left".
         */
        proc_is_left = (proc->proc_opal->proc_name <
                        opal_proc_local_get()->proc_name);

        err = create_proc_module_graph(proc, proc_is_left, &g);
        if (OPAL_SUCCESS != err) {
            goto out_free_table;
        }

        nme = 0;
        err = opal_btl_usnic_solve_bipartite_assignment(g, &nme, &me);
        if (OPAL_SUCCESS != err) {
            OPAL_ERROR_LOG(err);
            goto out_free_graph;
        }

        edge_pairs_to_match_table(proc, proc_is_left, nme, me);

        err = opal_btl_usnic_gr_free(g);
        if (OPAL_SUCCESS != err) {
            OPAL_ERROR_LOG(err);
            return err;
        }
    }


    if (!proc->proc_match_exists) {
        opal_output_verbose(5, USNIC_OUT, "btl:usnic:%s: unable to find any valid interface pairs for proc %s",
                            __func__, OPAL_NAME_PRINT(proc->proc_opal->proc_name));
        return OPAL_ERR_NOT_FOUND;
    }

    /* assuming no strange failure cases, this should always be present */
    if (NULL != proc->proc_ep_match_table && proc->proc_match_exists) {
        for (i = 0; i < num_modules; ++i) {
            if (module == mca_btl_usnic_component.usnic_active_modules[i]) {
                *index_out = proc->proc_ep_match_table[i];
                break;
            }
        }
    }

    /* If MTU does not match, throw an error */
    /* TODO with UDP, do we still want to enforce this restriction or just take
     * the min of the two MTUs?  Another choice is to disqualify this pairing
     * before running the matching algorithm on it. */
    if (*index_out >= 0 &&
        proc->proc_modex[*index_out].mtu != (uint16_t) module->if_mtu) {
        opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch",
                    true,
                    opal_process_info.nodename,
                    ibv_get_device_name(module->device),
                    module->if_name,
                    module->if_mtu,
                    (NULL == proc->proc_opal->proc_hostname) ?
                    "unknown" : proc->proc_opal->proc_hostname,
                    proc->proc_modex[*index_out].mtu);
        *index_out = -1;
        return OPAL_ERR_UNREACH;
    }

    return (*index_out == -1 ? OPAL_ERR_NOT_FOUND : OPAL_SUCCESS);

out_free_graph:
    opal_btl_usnic_gr_free(g);
out_free_table:
    free(proc->proc_ep_match_table);
    proc->proc_ep_match_table = NULL;
    proc->proc_match_exists = false;
    return err;
}
/*
 * Find a list of ibv_ports matching a set of criteria.
 */
opal_list_t *opal_common_verbs_find_ports(const char *if_include,
                                          const char *if_exclude,
                                          int flags,
                                          int stream)
{
    int32_t num_devs;
    struct ibv_device **devices;
    struct ibv_device *device;
    struct ibv_context *device_context;
    struct ibv_device_attr device_attr;
    struct ibv_port_attr port_attr;
    char **if_include_list = NULL, **if_exclude_list = NULL, **if_sanity_list = NULL;
    opal_common_verbs_device_item_t *di;
    opal_common_verbs_port_item_t *pi;
    int rc;
    uint32_t j;
    opal_list_t *port_list = NULL;
    bool want;

    /* Sanity check the include/exclude params */
    if (NULL != if_include && NULL != if_exclude) {
        return NULL;
    }

    /* Query all the IBV devices on the machine.  Use an ompi
       compatibility function, because how to get this list changed
       over the history of the IBV API. */
    devices = opal_ibv_get_device_list(&num_devs);
    if (0 == num_devs) {
        opal_output_verbose(5, stream, "no verbs interfaces found");
        return NULL;
    }

    opal_output_verbose(5, stream, "found %d verbs interface%s",
                        num_devs, (num_devs != 1) ? "s" : "");

    /* Allocate a list to fill */
    port_list = OBJ_NEW(opal_list_t);
    if (NULL == port_list) {
        return NULL;
    }

    if (NULL != if_include) {
        opal_output_verbose(5, stream, "finding verbs interfaces, including %s",
                            if_include);
        if_include_list = opal_argv_split(if_include, ',');
        if_sanity_list = opal_argv_copy(if_include_list);
    } else if (NULL != if_exclude) {
        opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s",
                            if_exclude);
        if_exclude_list = opal_argv_split(if_exclude, ',');
        if_sanity_list = opal_argv_copy(if_exclude_list);
    }

    /* Now loop through all the devices.  Get the attributes for each
       port on each device to see if they match our selection
       criteria. */
    for (int32_t i = 0; (int32_t) i < num_devs; ++i) {
        /* See if this device is on the include/exclude sanity check
           list.  If it is, remove it from the sanity check list
           (i.e., we should end up with an empty list at the end if
           all entries in the sanity check list exist) */
        device = devices[i];
        check_sanity(&if_sanity_list, ibv_get_device_name(device), -1);

        opal_output_verbose(5, stream, "examining verbs interface: %s",
                            ibv_get_device_name(device));

        device_context = ibv_open_device(device);
        if (NULL == device_context) {
            opal_show_help("help-opal-common-verbs.txt",
                           "ibv_open_device fail", true,
                           opal_proc_local_get()->proc_hostname,
                           ibv_get_device_name(device),
                           errno, strerror(errno));
            goto err_free_port_list;
        }

        if (ibv_query_device(device_context, &device_attr)){
            opal_show_help("help-opal-common-verbs.txt",
                           "ibv_query_device fail", true,
                           opal_proc_local_get()->proc_hostname,
                           ibv_get_device_name(device),
                           errno, strerror(errno));
            goto err_free_port_list;
        }

        /* Now that we have the attributes of this device, remove all
           ports of this device from the sanity check list.  Note that
           IBV ports are indexed from 1, not 0. */
        for (j = 1; j <= device_attr.phys_port_cnt; j++) {
            check_sanity(&if_sanity_list, ibv_get_device_name(device), j);
        }

        /* Check the device-specific flags to see if we want this
           device */
        want = false;

        if (flags & OPAL_COMMON_VERBS_FLAGS_TRANSPORT_IB &&
            IBV_TRANSPORT_IB == device->transport_type) {
            opal_output_verbose(5, stream, "verbs interface %s has right type (IB)",
                                ibv_get_device_name(device));
            want = true;
        }
        if (flags & OPAL_COMMON_VERBS_FLAGS_TRANSPORT_IWARP &&
            IBV_TRANSPORT_IWARP == device->transport_type) {
            opal_output_verbose(5, stream, "verbs interface %s has right type (IWARP)",
                                ibv_get_device_name(device));
            want = true;
        }

        /* Check for RC or UD QP support */
        if (flags & OPAL_COMMON_VERBS_FLAGS_RC) {
            rc = opal_common_verbs_qp_test(device_context, flags);
            if (OPAL_SUCCESS == rc) {
                want = true;
                opal_output_verbose(5, stream,
                                    "verbs interface %s supports RC QPs",
                                    ibv_get_device_name(device));
            } else {
                opal_output_verbose(5, stream,
                                    "verbs interface %s failed to make RC QP",
                                    ibv_get_device_name(device));
            }
        }
        if (flags & OPAL_COMMON_VERBS_FLAGS_UD) {
            rc = opal_common_verbs_qp_test(device_context, flags);
            if (OPAL_SUCCESS == rc) {
                want = true;
                opal_output_verbose(5, stream,
                                    "verbs interface %s supports UD QPs",
                                    ibv_get_device_name(device));
            } else if (OPAL_ERR_TYPE_MISMATCH == rc) {
                opal_output_verbose(5, stream,
                                    "verbs interface %s made an RC QP! we don't want RC-capable devices",
                                    ibv_get_device_name(device));
            } else {
                opal_output_verbose(5, stream,
                                    "verbs interface %s failed to make UD QP",
                                    ibv_get_device_name(device));
            }
        }

        /* If we didn't want it, go to the next device */
        if (!want) {
            continue;
        }

        /* Make a device_item_t to hold the device information */
        di = OBJ_NEW(opal_common_verbs_device_item_t);
        if (NULL == di) {
            goto err_free_port_list;
        }
        di->device = device;
        di->context = device_context;
        di->device_attr = device_attr;
        di->device_name = strdup(ibv_get_device_name(device));

        /* Note IBV ports are 1 based (not 0 based) */
        for (j = 1; j <= device_attr.phys_port_cnt; j++) {

            /* If we don't want this port (based on if_include /
               if_exclude lists), skip it */
            if (!want_this_port(if_include_list, if_exclude_list, di, j)) {
                opal_output_verbose(5, stream, "verbs interface %s:%d: rejected by include/exclude",
                                    ibv_get_device_name(device), j);
                continue;
            }

            /* Query the port */
            if (ibv_query_port(device_context, (uint8_t) j, &port_attr)) {
                opal_show_help("help-opal-common-verbs.txt",
                               "ibv_query_port fail", true,
                               opal_proc_local_get()->proc_hostname,
                               ibv_get_device_name(device),
                               errno, strerror(errno));
                goto err_free_port_list;
            }

            /* We definitely only want ACTIVE ports */
            if (IBV_PORT_ACTIVE != port_attr.state) {
                opal_output_verbose(5, stream, "verbs interface %s:%d: not ACTIVE",
                                    ibv_get_device_name(device), j);
                continue;
            }

            /* Check the port-specific flags to see if we want this
               port */
            want = false;
            if (0 == flags) {
                want = true;
            }

            if ((flags & (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
                          OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) ==
                 (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
                  OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) {
                /* If they specified both link layers, then we want this port */
                want = true;
            } else if ((flags & (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
                                 OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == 0) {
                /* If they specified neither link layer, then we want this port */
                want = true;
            }
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
            else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB) {
                if (IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) {
                    want = true;
                } else {
                    opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want IB)",
                                        ibv_get_device_name(device), j,
                                        link_layer_to_str(port_attr.link_layer));
                }
            } else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET) {
                if (IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) {
                    want = true;
                } else {
                    opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want Ethernet)",
                                        ibv_get_device_name(device), j,
                                        link_layer_to_str(port_attr.link_layer));
                }
            }
#endif

            if (!want) {
                continue;
            }

            /* If we got this far, we want the port.  Make an item for it. */
            pi = OBJ_NEW(opal_common_verbs_port_item_t);
            if (NULL == pi) {
                goto err_free_port_list;
            }
            pi->device = di;
            pi->port_num = j;
            pi->port_attr = port_attr;
            OBJ_RETAIN(di);

            /* Add the port item to the list */
            opal_list_append(port_list, &pi->super);
            opal_output_verbose(5, stream, "found acceptable verbs interface %s:%d",
                                ibv_get_device_name(device), j);
        }

        /* We're done with the device; if some ports are using it, its
           ref count will be > 0, and therefore the device won't be
           deleted here. */
        OBJ_RELEASE(di);
    }

    /* Sanity check that the devices specified in the if_include /
       if_exclude lists actually existed.  If this is true, then the
       sanity list will now be empty.  If there are still items left
       on the list, then they didn't exist.  Bad.  Print a warning (if
       the warning is not disabled). */
    if (0 != opal_argv_count(if_sanity_list)) {
        if (opal_common_verbs_warn_nonexistent_if) {
            char *str = opal_argv_join(if_sanity_list, ',');
            opal_show_help("help-opal-common-verbs.txt", "nonexistent port",
                           true, opal_proc_local_get()->proc_hostname,
                           ((NULL != if_include) ? "in" : "ex"), str);
            free(str);

            /* Only warn once per process */
            opal_common_verbs_warn_nonexistent_if = false;
        }
    }
    if (NULL != if_sanity_list) {
        opal_argv_free(if_sanity_list);
    }

    opal_argv_free(if_include_list);
    opal_argv_free(if_exclude_list);

    /* All done! */
    opal_ibv_free_device_list(devices);
    return port_list;

 err_free_port_list:
    OPAL_LIST_RELEASE(port_list);
    opal_ibv_free_device_list(devices);

    if (NULL != if_sanity_list) {
        opal_argv_free(if_sanity_list);
    }

    opal_argv_free(if_include_list);
    opal_argv_free(if_exclude_list);

    return NULL;
}
/* Function handle async device events */
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index,
                                    opal_list_t *ignore_qp_err_list)
{
    int j;
    mca_btl_openib_device_t *device = NULL;
    struct ibv_async_event event;
    bool xrc_event = false;
    int event_type;

    /* We need to find correct device and process this event */
    for (j=0; j < mca_btl_openib_component.ib_num_btls; j++) {
        if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd ==
                devices_poll->async_pollfd[index].fd ) {
            device = mca_btl_openib_component.openib_btls[j]->device;
            break;
        }
    }
    if (NULL != device) {
        if (ibv_get_async_event((struct ibv_context *)device->ib_dev_context,&event) < 0) {
            if (EWOULDBLOCK == errno) {
                /* No event found ?
                 * It was handled by somebody other */
                return OPAL_SUCCESS;
            } else {
                BTL_ERROR(("Failed to get async event"));
                return OPAL_ERROR;
            }
        }

        event_type = event.event_type;
#if HAVE_XRC
        /* is it XRC event ?*/
        if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
            xrc_event = true;
            /* Clean the bitnd handel as usual */
            event_type ^= IBV_XRC_QP_EVENT_FLAG;
        }
#endif
        switch(event_type) {
            case IBV_EVENT_PATH_MIG:
                BTL_ERROR(("Alternative path migration event reported"));
                if (APM_ENABLED) {
                    BTL_ERROR(("Trying to find additional path..."));
                    if (!xrc_event)
                        mca_btl_openib_load_apm(event.element.qp,
                                qp2endpoint(event.element.qp, device));
#if HAVE_XRC
                    else
                        mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
                                xrc_qp2endpoint(event.element.xrc_qp_num, device));
#endif
                }
                break;
            case IBV_EVENT_DEVICE_FATAL:
                /* Set the flag to fatal */
                device->got_fatal_event = true;
                /* It is not critical to protect the counter */
                OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
            case IBV_EVENT_CQ_ERR:
            case IBV_EVENT_QP_FATAL:
              if (event_type == IBV_EVENT_QP_FATAL) {
                  opal_list_item_t *item;
                  mca_btl_openib_qp_list *qp_item;
                  bool in_ignore_list = false;

                  BTL_VERBOSE(("QP is in err state %p", (void *)event.element.qp));

                  /* look through ignore list */
                  for (item = opal_list_get_first(ignore_qp_err_list);
                       item != opal_list_get_end(ignore_qp_err_list);
                       item = opal_list_get_next(item)) {
                      qp_item = (mca_btl_openib_qp_list *)item;
                      if (qp_item->qp == event.element.qp) {
                          BTL_VERBOSE(("QP %p is in error ignore list",
                                       (void *)event.element.qp));
                          in_ignore_list = true;
                          break;
                      }
                  }
                  if (in_ignore_list)
                      break;
              }

            case IBV_EVENT_QP_REQ_ERR:
            case IBV_EVENT_QP_ACCESS_ERR:
            case IBV_EVENT_PATH_MIG_ERR:
            case IBV_EVENT_SRQ_ERR:
                opal_show_help("help-mpi-btl-openib.txt", "of error event",
                    true,opal_proc_local_get()->proc_hostname, (int)getpid(),
                    event_type,
                    openib_event_to_str((enum ibv_event_type)event_type),
                    xrc_event ? "true" : "false");
                break;
            case IBV_EVENT_PORT_ERR:
                opal_show_help("help-mpi-btl-openib.txt", "of error event",
                    true,opal_proc_local_get()->proc_hostname, (int)getpid(),
                    event_type,
                    openib_event_to_str((enum ibv_event_type)event_type),
                    xrc_event ? "true" : "false");
                /* Set the flag to indicate port error */
                device->got_port_event = true;
                OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
                break;
            case IBV_EVENT_COMM_EST:
            case IBV_EVENT_PORT_ACTIVE:
            case IBV_EVENT_SQ_DRAINED:
            case IBV_EVENT_LID_CHANGE:
            case IBV_EVENT_PKEY_CHANGE:
            case IBV_EVENT_SM_CHANGE:
            case IBV_EVENT_QP_LAST_WQE_REACHED:
#if HAVE_DECL_IBV_EVENT_CLIENT_REREGISTER
            case IBV_EVENT_CLIENT_REREGISTER:
#endif
                break;
            /* The event is signaled when number of prepost receive WQEs is going
                                            under predefined threshold - srq_limit */
            case IBV_EVENT_SRQ_LIMIT_REACHED:
                if(OPAL_SUCCESS !=
                         btl_openib_async_srq_limit_event(event.element.srq)) {
                    return OPAL_ERROR;
                }

                break;
            default:
                opal_show_help("help-mpi-btl-openib.txt", "of unknown event",
                        true,opal_proc_local_get()->proc_hostname, (int)getpid(),
                        event_type, xrc_event ? "true" : "false");
        }
        ibv_ack_async_event(&event);
    } else {
        /* if (device == NULL), then failed to locate the device!
           This should never happen... */
        BTL_ERROR(("Failed to find device with FD %d.  "
                   "Fatal error, stoping asynch event thread",
                   devices_poll->async_pollfd[index].fd));
        return OPAL_ERROR;
    }
    return OPAL_SUCCESS;
}
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
{
    mca_btl_tcp_proc_t* btl_proc;
    size_t size;
    int rc;

    OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
    rc = opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
                                   proc->proc_name, (void**)&btl_proc);
    if(OPAL_SUCCESS == rc) {
        OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
        return btl_proc;
    }

    do {
        btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
        if(NULL == btl_proc) {
            rc = OPAL_ERR_OUT_OF_RESOURCE;
            break;
        }

        btl_proc->proc_opal = proc;

        OBJ_RETAIN(btl_proc->proc_opal);

        /* lookup tcp parameters exported by this proc */
        OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version,
                        &proc->proc_name, (uint8_t**)&btl_proc->proc_addrs, &size);
        if(rc != OPAL_SUCCESS) {
            if(OPAL_ERR_NOT_FOUND != rc)
                BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc));
            break;
        }

        if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
            BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n",
                       (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t)));
            rc = OPAL_ERROR;
            break;
        }

        btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);

        /* allocate space for endpoint array - one for each exported address */
        btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
            malloc((1 + btl_proc->proc_addr_count) *
                   sizeof(mca_btl_base_endpoint_t*));
        if(NULL == btl_proc->proc_endpoints) {
            rc = OPAL_ERR_OUT_OF_RESOURCE;
            break;
        }

        if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) {
            mca_btl_tcp_component.tcp_local = btl_proc;
        }

        /* convert the OPAL addr_family field to OS constants,
         * so we can check for AF_INET (or AF_INET6) and don't have
         * to deal with byte ordering anymore.
         */
        for (unsigned int i = 0; i < btl_proc->proc_addr_count; i++) {
            if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) {
                btl_proc->proc_addrs[i].addr_family = AF_INET;
            }
#if OPAL_ENABLE_IPV6
            if (MCA_BTL_TCP_AF_INET6 == btl_proc->proc_addrs[i].addr_family) {
                btl_proc->proc_addrs[i].addr_family = AF_INET6;
            }
#endif
        }
    } while (0);

    if (OPAL_SUCCESS == rc) {
        /* add to hash table of all proc instance. */
        opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs,
                                  proc->proc_name, btl_proc);
    } else {
        if (btl_proc) {
            OBJ_RELEASE(btl_proc);
            btl_proc = NULL;
        }
    }

    OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);

    return btl_proc;
}
Exemple #20
0
int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
                           size_t nprocs,
                           struct opal_proc_t **procs,
                           struct mca_btl_base_endpoint_t** peers,
                           opal_bitmap_t* reachable )
{
    mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl;
    const opal_proc_t* my_proc; /* pointer to caller's proc structure */
    int i, rc;

    /* get pointer to my proc structure */
    if( NULL == (my_proc = opal_proc_local_get()) )
        return OPAL_ERR_OUT_OF_RESOURCE;

    for(i = 0; i < (int) nprocs; i++) {

        struct opal_proc_t* opal_proc = procs[i];
        mca_btl_tcp_proc_t* tcp_proc;
        mca_btl_base_endpoint_t* tcp_endpoint;
        bool existing_found = false;

        /* Do not create loopback TCP connections */
        if( my_proc == opal_proc ) {
            continue;
        }

        if(NULL == (tcp_proc = mca_btl_tcp_proc_create(opal_proc))) {
            continue;
        }

        /*
         * Check to make sure that the peer has at least as many interface
         * addresses exported as we are trying to use. If not, then
         * don't bind this BTL instance to the proc.
         */

        OPAL_THREAD_LOCK(&tcp_proc->proc_lock);

        for (uint32_t j = 0 ; j < (uint32_t)tcp_proc->proc_endpoint_count ; ++j) {
            tcp_endpoint = tcp_proc->proc_endpoints[j];
            if (tcp_endpoint->endpoint_btl == tcp_btl) {
                existing_found = true;
                break;
            }
        }

        if (!existing_found) {
            /* The btl_proc datastructure is shared by all TCP BTL
             * instances that are trying to reach this destination.
             * Cache the peer instance on the btl_proc.
             */
            tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
            if(NULL == tcp_endpoint) {
                OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
                return OPAL_ERR_OUT_OF_RESOURCE;
            }

            tcp_endpoint->endpoint_btl = tcp_btl;
            rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
            if(rc != OPAL_SUCCESS) {
                OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
                OBJ_RELEASE(tcp_endpoint);
                continue;
            }

            OPAL_THREAD_LOCK(&tcp_btl->tcp_endpoints_mutex);
            opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
            OPAL_THREAD_UNLOCK(&tcp_btl->tcp_endpoints_mutex);
        }

        OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);

        if (NULL != reachable) {
            opal_bitmap_set_bit(reachable, i);
        }

        peers[i] = tcp_endpoint;

        /* we increase the count of MPI users of the event library
           once per peer, so that we are used until we aren't
           connected to a peer */
        opal_progress_event_users_increment();
    }

    return OPAL_SUCCESS;
}
Exemple #21
0
int mca_btl_scif_add_procs(struct mca_btl_base_module_t* btl,
                           size_t nprocs,
                           struct opal_proc_t **procs,
                           struct mca_btl_base_endpoint_t **peers,
                           opal_bitmap_t *reachable) {
    mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl;
    size_t procs_on_board, i, board_proc;
    opal_proc_t *my_proc = opal_proc_local_get();
    int rc;

    /* determine how many procs are on this board */
    for (i = 0, procs_on_board = 0 ; i < nprocs ; ++i) {
        struct opal_proc_t *opal_proc = procs[i];

        if (my_proc == opal_proc) {
            continue;
        }

        if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) ||
            my_proc == opal_proc) {
            /* scif can only be used with procs on this board */
            continue;
        }

        procs_on_board++;
    }

    /* allocate space for the detected peers and setup the mpool */
    if (NULL == scif_module->endpoints) {
        scif_module->endpoints = calloc (procs_on_board, sizeof (mca_btl_base_endpoint_t));
        if (OPAL_UNLIKELY(NULL == scif_module->endpoints)) {
            return OPAL_ERR_OUT_OF_RESOURCE;
        }

        rc = mca_btl_scif_setup_mpools (scif_module);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            BTL_ERROR(("btl/scif error setting up mpools/free lists"));
            return rc;
        }
    }

    for (i = 0, board_proc = 0 ; i < nprocs ; ++i) {
        struct opal_proc_t *opal_proc = procs[i];

        if (my_proc == opal_proc) {
            continue;
        }

        if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) ||
            my_proc == opal_proc) {
            peers[i] = NULL;
            /* scif can only be used with procs on this board */
            continue;
        }

        /* Initialize endpoints */
        rc = mca_btl_scif_ep_init (scif_module->endpoints + board_proc, (mca_btl_scif_module_t *) btl, opal_proc);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            BTL_ERROR(("btl/scif error initializing endpoint"));
            return rc;
        }

        scif_module->endpoints[board_proc].id = board_proc;

        /* Set the reachable bit */
        rc = opal_bitmap_set_bit (reachable, i);

        /* Store a reference to this peer */
        peers[i] = scif_module->endpoints + board_proc;

        board_proc++;
    }

    BTL_VERBOSE(("%lu procs on board\n", (unsigned long) procs_on_board));

    scif_module->endpoint_count = procs_on_board;

    if (!mca_btl_scif_module.listening) {
        /* start listening thread */
        rc = pthread_create (&mca_btl_scif_module.listen_thread, NULL, mca_btl_scif_connect_accept, NULL);
        if (0 > rc) {
            return OPAL_ERROR;
        }
        mca_btl_scif_module.listening = true;
    }

    return OPAL_SUCCESS;
}
Exemple #22
0
int
mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
                          size_t nprocs,
                          struct opal_proc_t **procs,
                          struct mca_btl_base_endpoint_t** btl_peer_data,
                          opal_bitmap_t* reachable)
{
    struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
    int ret;
    size_t i;

    opal_output_verbose(50, opal_btl_base_framework.framework_output,
                        "mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d",
                        (int) nprocs,
                        (int) portals4_btl->portals_num_procs,
                        portals4_btl->interface_num);

    /*
     * The PML handed us a list of procs that need Portals4
     * peer info.  Complete those procs here.
     */
    for (i = 0 ; i < nprocs ; ++i) {
        struct opal_proc_t *curr_proc = procs[i];

        /* portals doesn't support heterogeneous yet... */
        if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                "Portals 4 BTL does not support heterogeneous operations.");
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                "Proc %s architecture %x, mine %x.",
                OPAL_NAME_PRINT(curr_proc->proc_name),
                curr_proc->proc_arch, opal_proc_local_get()->proc_arch);
            return OPAL_ERR_NOT_SUPPORTED;
        }

        ret = create_endpoint(portals4_btl->interface_num,
                              curr_proc,
                              &btl_peer_data[i]);

        OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_num_procs, 1);
        /* and here we can reach */
        opal_bitmap_set_bit(reachable, i);

        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "add_procs: rank=%lx nid=%x pid=%x for NI %d",
            i,
            btl_peer_data[i]->ptl_proc.phys.nid,
            btl_peer_data[i]->ptl_proc.phys.pid,
            portals4_btl->interface_num));
    }

    if (mca_btl_portals4_component.need_init && portals4_btl->portals_num_procs > 0) {
        if (mca_btl_portals4_component.use_logical) {
            ret = create_maptable(portals4_btl, nprocs, procs, btl_peer_data);
            if (OPAL_SUCCESS != ret) {
                opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                    "%s:%d: mca_btl_portals4_add_procs::create_maptable() failed: %d\n",
                                    __FILE__, __LINE__, ret);
                return ret;
            }
        }

        ret = btl_portals4_init_interface();
        if (OPAL_SUCCESS != ret) {
            opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                "%s:%d: portals4 interface initialization failed: %d",
                                __FILE__, __LINE__, ret);
            return ret;
        }
        mca_btl_portals4_component.need_init = 0;
    }

    return OPAL_SUCCESS;
}
Exemple #23
0
/* Prints a few terse statistics lines via opal_output(0,...).  The first
 * line will be prefixed with the string "prefix".  If "reset_stats" is true
 * then the statistics will be reset after printing.
 *
 * NOTE: this routine ignores the setting of stats_enable, so it can be used
 * for debugging routines even when normal stats reporting is not enabled.
 */
void opal_btl_usnic_print_stats(
    opal_btl_usnic_module_t *module,
    const char *prefix,
    bool reset_stats)
{
    char tmp[128], str[2048];

    /* The usuals */
    snprintf(str, sizeof(str), "%s:MCW:%3u, ST(P+D)/F/C/R(T+F)/A:%8lu(%8u+%8u)/%8lu/%8lu/%4lu(%4lu+%4lu)/%8lu, RcvTot/Chk/F/C/L/H/D/BF/A:%8lu/%c%c/%8lu/%8lu/%4lu+%2lu/%4lu/%4lu/%6lu OA/DA %4lu/%4lu CRC:%4lu ",
             prefix,
             opal_proc_local_get()->proc_name.vpid,

             module->stats.num_total_sends,
             module->mod_channels[USNIC_PRIORITY_CHANNEL].num_channel_sends,
             module->mod_channels[USNIC_DATA_CHANNEL].num_channel_sends,
             module->stats.num_frag_sends,
             module->stats.num_chunk_sends,
             module->stats.num_resends,
             module->stats.num_timeout_retrans,
             module->stats.num_fast_retrans,
             module->stats.num_ack_sends,

             module->stats.num_total_recvs,
             (module->stats.num_total_recvs -
              module->stats.num_recv_reposts) == 0 ? 'g' : 'B',
             (module->stats.num_total_recvs -
              module->stats.num_frag_recvs -
              module->stats.num_chunk_recvs -
              module->stats.num_badfrag_recvs -
              module->stats.num_oow_low_recvs -
              module->stats.num_oow_high_recvs -
              module->stats.num_dup_recvs -
              module->stats.num_ack_recvs -
              module->stats.num_unk_recvs) == 0 ? 'g' : 'B',
             module->stats.num_frag_recvs,
             module->stats.num_chunk_recvs,
             module->stats.num_oow_low_recvs,
             module->stats.num_oow_high_recvs,
             module->stats.num_dup_recvs,
             module->stats.num_badfrag_recvs,
             module->stats.num_ack_recvs,

             module->stats.num_old_dup_acks,
             module->stats.num_dup_acks,

             module->stats.num_crc_errors);

    /* If our PML calls were 0, then show send and receive window
       extents instead */
    if (module->stats.pml_module_sends +
        module->stats.pml_send_callbacks == 0) {
        int64_t send_unacked, su_min = WINDOW_SIZE * 2, su_max = 0;
        int64_t recv_depth, rd_min = WINDOW_SIZE * 2, rd_max = 0;
        opal_btl_usnic_endpoint_t *endpoint;
        opal_list_item_t *item;

        rd_min = su_min = WINDOW_SIZE * 2;
        rd_max = su_max = 0;

        opal_mutex_lock(&module->all_endpoints_lock);
        item = opal_list_get_first(&module->all_endpoints);
        while (item != opal_list_get_end(&(module->all_endpoints))) {
            endpoint = container_of(item, mca_btl_base_endpoint_t,
                    endpoint_endpoint_li);
            item = opal_list_get_next(item);

            /* Number of un-acked sends (i.e., sends for which we're
               still waiting for ACK) */
            send_unacked =
                endpoint->endpoint_next_seq_to_send -
                endpoint->endpoint_ack_seq_rcvd - 1;
            if (send_unacked > su_max) su_max = send_unacked;
            if (send_unacked < su_min) su_min = send_unacked;

            /* Receive window depth (i.e., difference between highest
               seq received and the next message we haven't ACKed
               yet) */
            recv_depth =
                endpoint->endpoint_highest_seq_rcvd -
                endpoint->endpoint_next_contig_seq_to_recv;
            if (recv_depth > rd_max) rd_max = recv_depth;
            if (recv_depth < rd_min) rd_min = recv_depth;
        }
        opal_mutex_unlock(&module->all_endpoints_lock);
        snprintf(tmp, sizeof(tmp), "PML S:%1ld, Win!A/R:%4ld/%4ld %4ld/%4ld",
                 module->stats.pml_module_sends,
                 su_min, su_max,
                 rd_min, rd_max);
    } else {
        snprintf(tmp, sizeof(tmp), "PML S/CB/Diff:%4lu/%4lu=%4ld",
                module->stats.pml_module_sends,
                module->stats.pml_send_callbacks,
                module->stats.pml_module_sends -
                 module->stats.pml_send_callbacks);
    }

    strncat(str, tmp, sizeof(str) - strlen(str) - 1);
    opal_output(0, "%s", str);

    if (reset_stats) {
        usnic_stats_reset(module);
    }
}
Exemple #24
0
static int vader_add_procs (struct mca_btl_base_module_t* btl,
                            size_t nprocs, struct opal_proc_t **procs,
                            struct mca_btl_base_endpoint_t **peers,
                            opal_bitmap_t *reachability)
{
    mca_btl_vader_component_t *component = &mca_btl_vader_component;
    mca_btl_vader_t *vader_btl = (mca_btl_vader_t *) btl;
    const opal_proc_t *my_proc;
    int rc = OPAL_SUCCESS;

    /* initializion */

    /* get pointer to my proc structure */
    if (NULL == (my_proc = opal_proc_local_get())) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* jump out if there's not someone we can talk to */
    if (1 > MCA_BTL_VADER_NUM_LOCAL_PEERS) {
        return OPAL_SUCCESS;
    }

    /* make sure that my local rank has been defined */
    if (0 > MCA_BTL_VADER_LOCAL_RANK) {
        return OPAL_ERROR;
    }

    if (!vader_btl->btl_inited) {
        rc = vader_btl_first_time_init (vader_btl, 1 + MCA_BTL_VADER_NUM_LOCAL_PEERS);
        if (rc != OPAL_SUCCESS) {
            return rc;
        }
    }

    for (int32_t proc = 0, local_rank = 0 ; proc < (int32_t) nprocs ; ++proc) {
        /* check to see if this proc can be reached via shmem (i.e.,
           if they're on my local host and in my job) */
        if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
            !OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) {
            peers[proc] = NULL;
            continue;
        }

        if (my_proc != procs[proc]) {
            /* add this proc to shared memory accessibility list */
            rc = opal_bitmap_set_bit (reachability, proc);
            if(OPAL_SUCCESS != rc) {
                return rc;
            }
        }

        /* setup endpoint */
        peers[proc] = component->endpoints + local_rank;
        rc = init_vader_endpoint (peers[proc], procs[proc], local_rank++);
        if (OPAL_SUCCESS != rc) {
            break;
        }
    }

    return rc;
}