예제 #1
0
static inline mca_bml_base_btl_t *get_next_btl(int dst, int *btl_id)
{
    mca_bml_base_endpoint_t* endpoint;
    mca_bml_base_btl_t* bml_btl;
    oshmem_proc_t *proc;
    mca_bml_base_btl_array_t *btl_array = 0;
    int size = 0;
    int shmem_index = 0;

    /* get endpoint and btl */
    proc = oshmem_proc_group_all(dst);
    if (!proc) {
        SPML_ERROR("Can not find destination proc for pe=%d", dst);
        return NULL ;
    }

    endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
    if (!endpoint) {
        SPML_ERROR("pe=%d proc has no endpoint", dst);
        return NULL ;
    }

    /* At the moment always return first transport */
    size = mca_bml_base_btl_array_get_size(btl_array = &endpoint->btl_rdma);

    if (0 >= size) {
        /* Possibly this is SM BTL with KNEM disabled? Then we should use send based get/put */
        /*
           This hack is necessary for the case when KNEM is not available.
           In this case we still want to use send/recv of SM BTL for put and get
           but SM BTL is not in the rdma list anymore
        */
        size = mca_bml_base_btl_array_get_size(btl_array =
                &endpoint->btl_eager);
        if (0 < size) {
            /*Chose SHMEM capable btl from eager array. Not filter now: take the first
              (but could appear on demand).*/
            for (shmem_index = 0; shmem_index < size; shmem_index++) {
                bml_btl = mca_bml_base_btl_array_get_index(btl_array, shmem_index);
                _find_btl_id(bml_btl);
                size = 1;
                break;
            }
        }
    }

    bml_btl = mca_bml_base_btl_array_get_index(btl_array, shmem_index);
    *btl_id = proc->transport_ids[0];

#if SPML_YODA_DEBUG == 1
    assert(*btl_id >= 0 && *btl_id < YODA_BTL_MAX);
    SPML_VERBOSE(100, "pe=%d reachable via btl %s %d", dst,
                 bml_btl->btl->btl_component->btl_version.mca_component_name, *btl_id);
#endif
    return bml_btl;
}
예제 #2
0
size_t mca_pml_ob1_rdma_pipeline_btls_count (mca_bml_base_endpoint_t* bml_endpoint)
{
    int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
    int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager);
    int rdma_count = 0;

    for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; ++i) {
        mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
        /* NTH: go ahead and use an rdma btl if is the only one */
        bool ignore = !mca_pml_ob1.use_all_rdma;

        for (int i = 0 ; i < num_eager_btls && ignore ; ++i) {
            mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
            if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
                ignore = false;
                break;
            }
        }

        if (!ignore) {
            ++rdma_count;
        }
    }

    return rdma_count;
}
예제 #3
0
size_t mca_pml_bfo_rdma_btls(
    mca_bml_base_endpoint_t* bml_endpoint,
    unsigned char* base,
    size_t size,
    mca_pml_bfo_com_btl_t* rdma_btls)
{
    int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
    double weight_total = 0;
    int num_btls_used = 0, n;

    /* shortcut when there are no rdma capable btls */
    if(num_btls == 0) {
        return 0;
    }

    /* check to see if memory is registered */
    for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request;
            n++) {
        mca_bml_base_btl_t* bml_btl =
            mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
                    (bml_endpoint->btl_rdma_index + n) % num_btls);
        mca_mpool_base_registration_t* reg = &pml_bfo_dummy_reg;
        mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;

        if( NULL != btl_mpool ) {
            if(!mca_pml_bfo.leave_pinned) {
                /* look through existing registrations */
                btl_mpool->mpool_find(btl_mpool, base, size, &reg);
            } else {
                /* register the memory */
                btl_mpool->mpool_register(btl_mpool, base, size, 0, &reg);
            }

            if(NULL == reg)
                continue;
        }

        rdma_btls[num_btls_used].bml_btl = bml_btl;
        rdma_btls[num_btls_used].btl_reg = reg;
        weight_total += bml_btl->btl_weight;
        num_btls_used++;
    }

    /* if we don't use leave_pinned and all BTLs that already have this memory
     * registered amount to less then half of available bandwidth - fall back to
     * pipeline protocol */
    if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5))
        return 0;

    mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size,
                                     weight_total);

    bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls;
    return num_btls_used;
}
예제 #4
0
파일: bml_r2.c 프로젝트: urids/XSCALAMPI
static int mca_bml_r2_del_procs(size_t nprocs, 
                                struct ompi_proc_t** procs) 
{
    size_t p;
    int rc;
    struct ompi_proc_t** del_procs = (struct ompi_proc_t**) 
        malloc(nprocs * sizeof(struct ompi_proc_t*)); 
    size_t n_del_procs = 0; 

    if (NULL == del_procs) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    for(p = 0; p < nprocs; p++) { 
        ompi_proc_t *proc = procs[p]; 
        if(((opal_object_t*)proc)->obj_reference_count == 1) { 
            del_procs[n_del_procs++] = proc; 
        }
    }

    for(p = 0; p < n_del_procs; p++) {
        ompi_proc_t *proc = del_procs[p];
        mca_bml_base_endpoint_t* bml_endpoint =
            (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
        size_t f_index, f_size;

        /* notify each btl that the proc is going away */
        f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
        for(f_index = 0; f_index < f_size; f_index++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index);
            mca_btl_base_module_t* btl = bml_btl->btl;

            rc = btl->btl_del_procs(btl, 1, &proc, &bml_btl->btl_endpoint);
            if(OMPI_SUCCESS != rc) {
                free(del_procs);
                return rc;
            }

            /* The reference stored in btl_eager and btl_rdma will automatically
             * dissapear once the btl_array destructor is called. Thus, there is
             * no need for extra cleaning here.
             */
        }

        OBJ_RELEASE(proc);
        /* do any required cleanup */
        OBJ_RELEASE(bml_endpoint);
        proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
    }
    free(del_procs);

    return OMPI_SUCCESS;
}
예제 #5
0
파일: pml_ob1_rdma.c 프로젝트: aosm/openmpi
size_t mca_pml_ob1_rdma_btls(
    mca_bml_base_endpoint_t* bml_endpoint,
    unsigned char* base,
    size_t size,
    mca_pml_ob1_rdma_btl_t* rdma_btls)
{
    size_t num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
    size_t num_btls_used = 0;
    size_t n;

    /* shortcut when there are no rdma capable btls */
    if(num_btls == 0) {
        return 0;
    }

    /* check to see if memory is registered */        
    for(n = 0; n < num_btls && num_btls_used < MCA_PML_OB1_MAX_RDMA_PER_REQUEST;
            n++) {
        mca_bml_base_btl_t* bml_btl =
            mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
                    (bml_endpoint->btl_rdma_index + n) % num_btls); 
        mca_mpool_base_registration_t* reg = NULL;
        mca_mpool_base_module_t *btl_mpool = bml_btl->btl_mpool;

        /* btl is rdma capable and registration is not required */
        if(NULL == btl_mpool) {
            reg = NULL;
        } else {
            if(!mca_pml_ob1.leave_pinned) {
                /* look through existing registrations */
                btl_mpool->mpool_find(btl_mpool, base, size, &reg);
            } else {
                /* register the memory */
                btl_mpool->mpool_register(btl_mpool, base, size, 0, &reg);
            }

            if(NULL == reg)
                bml_btl = NULL; /* skip it */
        }

        if(bml_btl != NULL) {
            rdma_btls[num_btls_used].bml_btl = bml_btl;
            rdma_btls[num_btls_used].btl_reg = reg;
            num_btls_used++;
        }
    }
    bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls;
    return num_btls_used;
}
예제 #6
0
size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
                                       size_t size,
                                       mca_pml_ob1_com_btl_t* rdma_btls )
{
    int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
    int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager);
    double weight_total = 0;
    int rdma_count = 0;

    for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
        mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
        /* NTH: go ahead and use an rdma btl if is the only one */
        bool ignore = !mca_pml_ob1.use_all_rdma;

        for (int i = 0 ; i < num_eager_btls && ignore ; ++i) {
            mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
            if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
                ignore = false;
                break;
            }
        }

        if (ignore) {
            continue;
        }

        rdma_btls[rdma_count].bml_btl = bml_btl;
        rdma_btls[rdma_count++].btl_reg = NULL;

        weight_total += bml_btl->btl_weight;
    }

    mca_pml_ob1_calc_weighted_length (rdma_btls, rdma_count, size, weight_total);

    return rdma_count;
}
예제 #7
0
파일: spml_yoda.c 프로젝트: 00datman/ompi
/* for each proc create transport ids which are indexes into global
 * btl list&map
 */
static int create_btl_idx(int dst_pe)
{
    oshmem_proc_t *proc;
    int btl_id;
    mca_bml_base_endpoint_t* endpoint;
    mca_bml_base_btl_t* bml_btl = 0;
    int i, size;
    mca_bml_base_btl_array_t *btl_array;
    int shmem_index = -1;

    proc = oshmem_proc_group_find(oshmem_group_all, dst_pe);
    endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
    assert(endpoint);
    size = mca_bml_base_btl_array_get_size(btl_array = &endpoint->btl_rdma);

    if (0 >= size) {
        /* Possibly this is SM BTL with KNEM disabled? Then we should use send based get/put */
        /*
           This hack is necessary for the case when KNEM is not available.
           In this case we still want to use send/recv of SM BTL for put and get
           but SM BTL is not in the rdma list anymore
        */
        size = mca_bml_base_btl_array_get_size(btl_array =
                &endpoint->btl_eager);
        if (0 < size) {
            /*Chose SHMEM capable btl from eager array. Not filter now: take the first
              (but could appear on demand).*/
            shmem_index = 0;
            size = 1;
        }
        else {
            SPML_ERROR("no SHMEM capable transport for dest pe=%d", dst_pe);
            return OSHMEM_ERROR;
        }
    }

    proc->transport_ids = (char *) malloc(size * sizeof(char));
    if (!proc->transport_ids)
        return OSHMEM_ERROR;

    proc->num_transports = size;

    for (i = 0; i < size; i++) {
        bml_btl = mca_bml_base_btl_array_get_index(btl_array,
                                                   (shmem_index >= 0) ?
                                                       (shmem_index) : (i));
        btl_id = _find_btl_id(bml_btl);
        SPML_VERBOSE(50,
                     "dst_pe(%d) use btl (%s) btl_id=%d",
                     dst_pe, bml_btl->btl->btl_component->btl_version.mca_component_name, btl_id);
        if (0 > btl_id) {
            SPML_ERROR("unknown btl: dst_pe(%d) use btl (%s) btl_id=%d",
                       dst_pe, bml_btl->btl->btl_component->btl_version.mca_component_name, btl_id);
            return OSHMEM_ERROR;
        }
        proc->transport_ids[i] = btl_id;
        mca_spml_yoda.btl_type_map[btl_id].bml_btl = bml_btl;
        mca_spml_yoda.btl_type_map[btl_id].use_cnt++;
    }
    return OSHMEM_SUCCESS;
}
예제 #8
0
static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
{
    mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
    mca_bml_base_btl_t* bml_btl;
    mca_btl_base_module_t* ep_btl;
    double total_bandwidth = 0;
    size_t b;

    if(NULL == ep)
        return OMPI_SUCCESS;

    /* remove btl from eager list */
    mca_bml_base_btl_array_remove(&ep->btl_eager, btl);
    
    /* remove btl from send list */ 
    if(mca_bml_base_btl_array_remove(&ep->btl_send, btl)) { 
    
        /* compute total_bandwidth and 
           reset max_send_size to the min of all btl's */
        total_bandwidth = 0;
        ep->btl_max_send_size = -1;
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
            bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
            ep_btl = bml_btl->btl;

            total_bandwidth += ep_btl->btl_bandwidth;
            if (ep->btl_max_send_size > ep_btl->btl_max_send_size) {
                ep->btl_max_send_size = ep_btl->btl_max_send_size;
            }
        }
        
        /* compute weighting factor for this btl */
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
            bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
            ep_btl = bml_btl->btl;

            if(ep_btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = (float)(ep_btl->btl_bandwidth / total_bandwidth);
            } else {
                bml_btl->btl_weight = (float)(1.0 / mca_bml_base_btl_array_get_size(&ep->btl_send));
            }
        }
    }

    /* remove btl from RDMA list */
    if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) { 
        
        /* compute total bandwidth */
        total_bandwidth = 0;
        ep->btl_pipeline_send_length = 0;
        ep->btl_send_limit = 0;
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
            bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
            ep_btl = bml_btl->btl;

            /* update aggregate endpoint info */
            total_bandwidth += ep_btl->btl_bandwidth;
            if (ep->btl_pipeline_send_length < ep_btl->btl_rdma_pipeline_send_length) {
                ep->btl_pipeline_send_length = ep_btl->btl_rdma_pipeline_send_length;
            }
            if (ep->btl_send_limit < ep_btl->btl_min_rdma_pipeline_size) {
                ep->btl_send_limit = ep_btl->btl_min_rdma_pipeline_size;
            }
        }
        
        /* compute weighting factor for this btl */
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
            bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
            ep_btl = bml_btl->btl;

            if(ep_btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = (float)(ep_btl->btl_bandwidth / total_bandwidth);
            } else {
                bml_btl->btl_weight = (float)(1.0 / mca_bml_base_btl_array_get_size(&ep->btl_rdma));
            }
        }
    }
    
    return OMPI_SUCCESS;
}
예제 #9
0
static int mca_bml_r2_del_procs(size_t nprocs, 
                                struct ompi_proc_t** procs) 
{
    size_t p;
    int rc;
    struct ompi_proc_t** del_procs = (struct ompi_proc_t**) 
        malloc(nprocs * sizeof(struct ompi_proc_t*)); 
    size_t n_del_procs = 0; 

    if (NULL == del_procs) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    for(p =0; p < nprocs; p++) { 
        ompi_proc_t *proc = procs[p]; 
        if(((opal_object_t*)proc)->obj_reference_count == 1) { 
            del_procs[n_del_procs++] = proc; 
        }
    }
    
    for(p = 0; p < n_del_procs; p++) {
        ompi_proc_t *proc = del_procs[p];
        mca_bml_base_endpoint_t* bml_endpoint =
            (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
        size_t f_index, f_size;
        size_t n_index, n_size;
 
        /* notify each btl that the proc is going away */
        f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_eager);
        for(f_index = 0; f_index < f_size; f_index++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_eager, f_index);
            mca_btl_base_module_t* btl = bml_btl->btl;
            
            rc = btl->btl_del_procs(btl,1,&proc,&bml_btl->btl_endpoint);
            if(OMPI_SUCCESS != rc) {
                return rc;
            }

            /* remove this from next array so that we dont call it twice w/ 
             * the same address pointer
             */
            n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_eager);
            for(n_index = 0; n_index < n_size; n_index++) {
                mca_bml_base_btl_t* search_bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
                if(search_bml_btl->btl == btl) {
                    memset(search_bml_btl, 0, sizeof(mca_bml_base_btl_t));
                    break;
                }
            }
        }

        /* notify each r2 that was not in the array of r2s for first fragments */
        n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_eager, n_index);
            mca_btl_base_module_t* btl = bml_btl->btl;
            if (btl != 0) {
                rc = btl->btl_del_procs(btl,1,&proc,&bml_btl->btl_endpoint);
                if(OMPI_SUCCESS != rc) {
                    return rc;
                }
            }
        }
        
        OBJ_RELEASE(proc); 
        /* do any required cleanup */
        OBJ_RELEASE(bml_endpoint);
        
    }
    return OMPI_SUCCESS;
}
예제 #10
0
static int mca_bml_r2_add_procs( size_t nprocs, 
                                 struct ompi_proc_t** procs, 
                                 struct opal_bitmap_t* reachable )
{
    size_t p, p_index, n_new_procs = 0;
    struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;  
    struct ompi_proc_t** new_procs = NULL; 
    struct ompi_proc_t *unreach_proc = NULL;
    int rc, ret = OMPI_SUCCESS;

    if(0 == nprocs) {
        return OMPI_SUCCESS;
    }
    
    if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) ) {
        return rc;
    }
    
    /* Select only the procs that don't yet have the BML proc struct. This prevent
     * us from calling btl->add_procs several this on the same destination proc.
     */
    for(p_index = 0; p_index < nprocs; p_index++) { 
        struct ompi_proc_t* proc = procs[p_index]; 

        OBJ_RETAIN(proc); 
        if(NULL !=  proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { 
            continue;  /* go to the next proc */
        }
        /* Allocate the new_procs on demand */
        if( NULL == new_procs ) {
            new_procs = (struct ompi_proc_t **)malloc(nprocs * sizeof(struct ompi_proc_t *));
            if( NULL == new_procs ) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
        }
        new_procs[n_new_procs++] = proc; 
    }

    if ( 0 == n_new_procs ) {
        return OMPI_SUCCESS;
    }

    /* Starting from here we only work on the unregistered procs */
    procs = new_procs; 
    nprocs = n_new_procs; 
    
    /* attempt to add all procs to each r2 */
    btl_endpoints = (struct mca_btl_base_endpoint_t **) 
        malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*)); 
    if (NULL == btl_endpoints) {
        free(new_procs);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
        mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
        int btl_inuse = 0;

        /* if the r2 can reach the destination proc it sets the
         * corresponding bit (proc index) in the reachable bitmap
         * and can return addressing information for each proc
         * that is passed back to the r2 on data transfer calls
         */
        opal_bitmap_clear_all_bits(reachable);
        memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); 

        rc = btl->btl_add_procs(btl, n_new_procs, new_procs, btl_endpoints, reachable);
        if(OMPI_SUCCESS != rc) {
            /* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
             * can take care of this task.
             */
            continue;
        }

        /* for each proc that is reachable */
        for( p = 0; p < n_new_procs; p++ ) {
            if(opal_bitmap_is_set_bit(reachable, p)) {
                ompi_proc_t *proc = new_procs[p]; 
                mca_bml_base_endpoint_t * bml_endpoint = 
                    (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; 
                mca_bml_base_btl_t* bml_btl; 
                size_t size;
                
                if(NULL == bml_endpoint) { 
                    /* allocate bml specific proc data */
                    bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
                    if (NULL == bml_endpoint) {
                        opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
                        free(btl_endpoints);
                        free(new_procs);
                        return OMPI_ERR_OUT_OF_RESOURCE;
                    }
                    
                    /* preallocate space in array for max number of r2s */
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send,  mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma,  mca_bml_r2.num_btl_modules);
                    bml_endpoint->btl_max_send_size = -1;
                    bml_endpoint->btl_proc = proc;
                    proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint; 
                 
                    bml_endpoint->btl_flags_or = 0;
                }

                /* dont allow an additional BTL with a lower exclusivity ranking */
                size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
                if(size > 0) {
                    bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
                    /* skip this btl if the exclusivity is less than the previous */
                    if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity) {
                        btl->btl_del_procs(btl, 1, &proc, &btl_endpoints[p]);
                        continue;
                    }
                }

                /* cache the endpoint on the proc */
                bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
                bml_btl->btl = btl;
                bml_btl->btl_endpoint = btl_endpoints[p];
                bml_btl->btl_weight = 0;
                bml_btl->btl_flags = btl->btl_flags; 
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
                                " the %s BTL without any PUT function attached. Disard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_PUT;
                }
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
                                " the %s BTL without any GET function attached. Discard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_GET;
                }
                if( (bml_btl->btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
                    /**
                     * If no protocol specified, we have 2 choices: we ignore the BTL
                     * as we don't know which protocl to use, or we suppose that all
                     * BTLs support the send protocol. 
                     */
                    bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
                }
                /**
                 * calculate the bitwise OR of the btl flags 
                 */
                bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
                /* This BTL is in use, allow the progress registration */
                btl_inuse++;
            }
        }
        if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
            size_t p;
            bool found = false;
            for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) {
                if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
                    found = true;
                    break;
                }
            }
            if(found == false) {
                mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] = 
                    btl->btl_component->btl_progress;
                mca_bml_r2.num_btl_progress++;
                opal_progress_register( btl->btl_component->btl_progress );
            }
        }
    }
    free(btl_endpoints);

    /* iterate back through procs and compute metrics for registered r2s */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];
        mca_bml_base_endpoint_t* bml_endpoint = 
            (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
        double total_bandwidth = 0;
        uint32_t latency = 0xffffffff;
        size_t n_index;
        size_t n_size;

        /* skip over procs w/ no btl's registered */
        if(NULL == bml_endpoint) {
            continue;
        }

        /* (1) determine the total bandwidth available across all btls
         *     note that we need to do this here, as we may already have btls configured
         * (2) determine the highest priority ranking for latency
         * (3) compute the maximum amount of bytes that can be send without any
         *     weighting. Once the left over is smaller than this number we will
         *     start using the weight to compute the correct amount.
         */
        n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); 
        
        /* sort BTLs in descending order according to bandwidth value */
        qsort(bml_endpoint->btl_send.bml_btls, n_size,
                sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);

        bml_endpoint->btl_rdma_index = 0;
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t* btl = bml_btl->btl;
            total_bandwidth += bml_btl->btl->btl_bandwidth;
            if(btl->btl_latency < latency) {
                latency = btl->btl_latency;
            }
        }
        
        /* (1) set the weight of each btl as a percentage of overall bandwidth
         * (2) copy all btl instances at the highest priority ranking into the
         *     list of btls used for first fragments
         */
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t *btl = bml_btl->btl;

            /* compute weighting factor for this r2 */
            if(btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
            } else {
                bml_btl->btl_weight = (float)(1.0 / n_size);
            }

            /* check to see if this r2 is already in the array of r2s 
             * used for first fragments - if not add it.
             */
            if(btl->btl_latency == latency) {
                mca_bml_base_btl_t* bml_btl_new = 
                    mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
                *bml_btl_new = *bml_btl;
            }

            /* set endpoint max send size as min of available btls */
            if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
               bml_endpoint->btl_max_send_size = btl->btl_max_send_size;

            /* check flags - is rdma prefered */
            if ((btl->btl_flags & (MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET)) &&
                !((proc->proc_arch != ompi_proc_local_proc->proc_arch) &&
                  (0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
                mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
                mca_btl_base_module_t* btl_rdma = bml_btl->btl;

                *bml_btl_rdma = *bml_btl;
                if(bml_endpoint->btl_pipeline_send_length < btl_rdma->btl_rdma_pipeline_send_length) {
                    bml_endpoint->btl_pipeline_send_length = btl_rdma->btl_rdma_pipeline_send_length;
                }
                if(bml_endpoint->btl_send_limit < btl_rdma->btl_min_rdma_pipeline_size) {
                    bml_endpoint->btl_send_limit = btl_rdma->btl_min_rdma_pipeline_size;
                }
            }
        }
    }

    /* see if we have a connection to everyone else */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];

        if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
            if (NULL == unreach_proc) {
                unreach_proc = proc;
            }
            ret = OMPI_ERR_UNREACH;
        }
    }

    if (mca_bml_r2.show_unreach_errors && 
        OMPI_ERR_UNREACH == ret) {
        opal_show_help("help-mca-bml-r2.txt",
                       "unreachable proc",
                       true, 
                       OMPI_NAME_PRINT(&(ompi_proc_local_proc->proc_name)),
                       (NULL != ompi_proc_local_proc->proc_hostname ?
                        ompi_proc_local_proc->proc_hostname : "unknown!"),
                       OMPI_NAME_PRINT(&(unreach_proc->proc_name)),
                       (NULL != ompi_proc_local_proc->proc_hostname ?
                        ompi_proc_local_proc->proc_hostname : "unknown!"),
                       btl_names);
    }

    free(new_procs); 

    return ret;
}
예제 #11
0
size_t mca_pml_ob1_rdma_btls(
    mca_bml_base_endpoint_t* bml_endpoint,
    unsigned char* base,
    size_t size,
    mca_pml_ob1_com_btl_t* rdma_btls)
{
    int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
    int num_eager_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_eager);
    double weight_total = 0;
    int num_btls_used = 0;

    /* shortcut when there are no rdma capable btls */
    if(num_btls == 0) {
        return 0;
    }

    /* check to see if memory is registered */
    for (int n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request; n++) {
        mca_bml_base_btl_t* bml_btl =
            mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
                    (bml_endpoint->btl_rdma_index + n) % num_btls);
        mca_btl_base_registration_handle_t *reg_handle = NULL;
        mca_btl_base_module_t *btl = bml_btl->btl;
        /* NTH: go ahead and use an rdma btl if is the only one */
        bool ignore = !mca_pml_ob1.use_all_rdma;

        /* do not use rdma btls that are not in the eager list. this is necessary to avoid using
         * btls that exist on the endpoint only to support RMA. */
        for (int i = 0 ; i < num_eager_btls && ignore ; ++i) {
            mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
            if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
                ignore = false;
                break;
            }
        }

        if (ignore) {
            continue;
        }

        if (btl->btl_register_mem) {
            /* do not use the RDMA protocol with this btl if 1) leave pinned is disabled,
             * 2) the btl supports put, and 3) the fragment is larger than the minimum
             * pipeline size specified by the BTL */
            if (!opal_leave_pinned && (btl->btl_flags & MCA_BTL_FLAGS_PUT) &&
                  size > btl->btl_min_rdma_pipeline_size) {
                continue;
            }

            /* try to register the memory region with the btl */
            reg_handle = btl->btl_register_mem (btl, bml_btl->btl_endpoint, base,
                                                size, MCA_BTL_REG_FLAG_REMOTE_READ);
            if (NULL == reg_handle) {
                /* btl requires registration but the registration failed */
                continue;
            }
        } /* else no registration is needed with this btl */

        rdma_btls[num_btls_used].bml_btl = bml_btl;
        rdma_btls[num_btls_used].btl_reg = reg_handle;
        weight_total += bml_btl->btl_weight;
        num_btls_used++;
    }

    /* if we don't use leave_pinned and all BTLs that already have this memory
     * registered amount to less then half of available bandwidth - fall back to
     * pipeline protocol */
    if (0 == num_btls_used || (!opal_leave_pinned && weight_total < 0.5))
        return 0;

    mca_pml_ob1_calc_weighted_length(rdma_btls, num_btls_used, size,
                                     weight_total);

    bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls;
    return num_btls_used;
}
예제 #12
0
파일: bml_r2.c 프로젝트: aosm/openmpi
int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
{
    mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_bml;
    double total_bandwidth = 0;
    size_t b;

    /* remove btl from eager list */
    mca_bml_base_btl_array_remove(&ep->btl_eager, btl);
    
    /* remove btl from send list */ 
    if(mca_bml_base_btl_array_remove(&ep->btl_send, btl)) { 
    
        /* compute total_bandwidth and 
           reset max_send_size to the min of all btl's */
        total_bandwidth = 0;
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
            total_bandwidth += bml_btl->btl->btl_bandwidth;
            if (bml_btl->btl_max_send_size < ep->btl_max_send_size) {
                ep->btl_max_send_size = bml_btl->btl->btl_max_send_size;
            }
        }
        
        /* compute weighting factor for this btl */
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
            if(bml_btl->btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = bml_btl->btl->btl_bandwidth / total_bandwidth;
            } else {
                bml_btl->btl_weight = 1.0 / mca_bml_base_btl_array_get_size(&ep->btl_send);
            }
        }
    }
    
    /* remove btl from RDMA list */
    if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) { 
        
        /* computer total bandwidth */
        total_bandwidth = 0;
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
            /* update aggregate endpoint info */
            total_bandwidth += bml_btl->btl->btl_bandwidth;
            if (ep->btl_rdma_offset < bml_btl->btl_min_rdma_size) {
                ep->btl_rdma_offset = bml_btl->btl_min_rdma_size;
            } 
        }
        
        /* compute weighting factor for this btl */
        for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
            mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
            if(bml_btl->btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = bml_btl->btl->btl_bandwidth / total_bandwidth;
            } else {
                bml_btl->btl_weight = 1.0 / mca_bml_base_btl_array_get_size(&ep->btl_rdma);
            }
        }
    }
    
    return OMPI_SUCCESS;
}
예제 #13
0
파일: bml_r2.c 프로젝트: aosm/openmpi
int mca_bml_r2_add_procs(
                         size_t nprocs, 
                         struct ompi_proc_t** procs, 
                         struct mca_bml_base_endpoint_t** bml_endpoints, 
                         struct ompi_bitmap_t* reachable
                         )
{
    size_t p;
    int rc;
    size_t p_index;
    struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;  
    struct ompi_proc_t** new_procs = NULL; 
    size_t n_new_procs = 0;
    int ret = OMPI_SUCCESS;
    struct ompi_proc_t *unreach_proc = NULL;

    if(0 == nprocs) {
        return OMPI_SUCCESS;
    }
    
    if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) ) {
        return rc;
    }
    
    new_procs = (struct ompi_proc_t **) 
        malloc(nprocs * sizeof(struct ompi_proc_t *)); 
    if (NULL == new_procs ) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    memset(bml_endpoints, 0, nprocs * sizeof(struct mca_bml_base_endpoint_t*));

    for(p_index = 0; p_index < nprocs; p_index++) { 
        struct ompi_proc_t* proc;
        proc = procs[p_index]; 
        OBJ_RETAIN(proc); 
        
        if(NULL !=  proc->proc_bml) { 
            bml_endpoints[p_index] = 
                (mca_bml_base_endpoint_t*) proc->proc_bml; 
        } else { 
            new_procs[n_new_procs++] = proc; 
        }
    }

    if ( 0 == n_new_procs ) {
	return OMPI_SUCCESS;
    }
    
    procs = new_procs; 
    nprocs = n_new_procs; 
    
    /* attempt to add all procs to each r2 */
    btl_endpoints = (struct mca_btl_base_endpoint_t **) 
        malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*)); 
    if (NULL == btl_endpoints) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    
    for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
        mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
        int btl_inuse = 0;
        
        /* if the r2 can reach the destination proc it sets the
         * corresponding bit (proc index) in the reachable bitmap
         * and can return addressing information for each proc
         * that is passed back to the r2 on data transfer calls
         */
        ompi_bitmap_clear_all_bits(reachable);
        memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); 

        rc = btl->btl_add_procs(btl, n_new_procs, new_procs, btl_endpoints, reachable);
        if(OMPI_SUCCESS != rc) {
            free(btl_endpoints);
            return rc;
        }

        /* for each proc that is reachable - add the endpoint to the bml_endpoints array(s) */
        for(p=0; p<n_new_procs; p++) {
            if(ompi_bitmap_is_set_bit(reachable, p)) {
                ompi_proc_t *proc = new_procs[p]; 
                mca_bml_base_endpoint_t * bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; 
                mca_bml_base_btl_t* bml_btl; 
                size_t size;
                
                btl_inuse++;

                if(NULL == bml_endpoint) { 
                    
                    
                    /* allocate bml specific proc data */
                    bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
                    if (NULL == bml_endpoint) {
                        opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
                        free(btl_endpoints);
                        return OMPI_ERR_OUT_OF_RESOURCE;
                    }
                    
                    /* preallocate space in array for max number of r2s */
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send,  mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma,  mca_bml_r2.num_btl_modules);
                    bml_endpoint->btl_max_send_size = -1;
                    bml_endpoint->btl_proc = proc;
                    proc->proc_bml = bml_endpoint; 
                 
                    bml_endpoint->btl_flags_and = 0;
                    bml_endpoint->btl_flags_or = 0;
                }

                bml_endpoints[p] =(mca_bml_base_endpoint_t*)  proc->proc_bml; 
                
                
                /* dont allow an additional BTL with a lower exclusivity ranking */
                size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
                if(size > 0) {
                    bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
                    /* skip this btl if the exclusivity is less than the previous */
                    if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity) {
                        if(btl_endpoints[p] != NULL) {
                            btl->btl_del_procs(btl, 1, &proc, &btl_endpoints[p]);
                        }
                        btl_inuse--;
                        continue;
                    }
                }

                /* cache the endpoint on the proc */
                bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
                bml_btl->btl = btl;
                bml_btl->btl_eager_limit = btl->btl_eager_limit;
                bml_btl->btl_min_send_size = btl->btl_min_send_size;
                bml_btl->btl_max_send_size = btl->btl_max_send_size;
                bml_btl->btl_min_rdma_size = btl->btl_min_rdma_size;
                bml_btl->btl_max_rdma_size = btl->btl_max_rdma_size;
                bml_btl->btl_cache = NULL;
                bml_btl->btl_endpoint = btl_endpoints[p];
                bml_btl->btl_weight = 0;
                bml_btl->btl_alloc = btl->btl_alloc;
                bml_btl->btl_free = btl->btl_free;
                bml_btl->btl_prepare_src = btl->btl_prepare_src;
                bml_btl->btl_prepare_dst = btl->btl_prepare_dst;
                bml_btl->btl_send = btl->btl_send;
                bml_btl->btl_flags = btl->btl_flags; 
                bml_btl->btl_put = btl->btl_put;
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == bml_btl->btl_put) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
                                " the %s BTL without any PUT function attached. Disard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_PUT;
                }
                bml_btl->btl_get = btl->btl_get;
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_GET) && (NULL == bml_btl->btl_get) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
                                " the %s BTL without any GET function attached. Disard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_GET;
                }
                bml_btl->btl_mpool = btl->btl_mpool;
                if( (bml_btl->btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
                    /**
                     * If no protocol specified, we have 2 choices: we ignore the BTL
                     * as we don't know which protocl to use, or we suppose that all
                     * BTLs support the send protocol. 
                     */
                    bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
                }
                /**
                 * calculate the bitwise OR and AND of the btl flags 
                 */
                bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
                bml_endpoint->btl_flags_and &= bml_btl->btl_flags;
            }
        }
        if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
            size_t p;
            bool found = false;
            for(p=0; p<mca_bml_r2.num_btl_progress; p++) {
                if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
                    found = true;
                    break;
                }
            }
            if(found == false) {
                mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] = 
                    btl->btl_component->btl_progress;
                mca_bml_r2.num_btl_progress++;
            }
        }
    }
    free(btl_endpoints);

    /* iterate back through procs and compute metrics for registered r2s */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];
        mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml;
        double total_bandwidth = 0;
        uint32_t latency = 0xffffffff;
        size_t n_index;
        size_t n_size;

        /* skip over procs w/ no btl's registered */
        if(NULL == bml_endpoint) {
            continue;
        }

        /* (1) determine the total bandwidth available across all btls
         *     note that we need to do this here, as we may already have btls configured
         * (2) determine the highest priority ranking for latency
         * (3) compute the maximum amount of bytes that can be send without any
         *     weighting. Once the left over is smaller than this number we will
         *     start using the weight to compute the correct amount.
         */
        n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); 
        bml_endpoint->bml_max_send_length = 0;
        bml_endpoint->bml_max_rdma_length = 0;
        bml_endpoint->btl_rdma_index = 0;
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t* btl = bml_btl->btl;
            total_bandwidth += bml_btl->btl->btl_bandwidth;
            if(btl->btl_latency < latency) {
                latency = btl->btl_latency;
            }
            bml_endpoint->bml_max_send_length += bml_btl->btl->btl_bandwidth;
        }
        
        /* (1) set the weight of each btl as a percentage of overall bandwidth
         * (2) copy all btl instances at the highest priority ranking into the
         *     list of btls used for first fragments
         */
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t *btl = bml_btl->btl;

            /* compute weighting factor for this r2 */
            if(btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = btl->btl_bandwidth / total_bandwidth;
            } else {
                bml_btl->btl_weight = 1.0 / n_size;
            }

            /* check to see if this r2 is already in the array of r2s 
             * used for first fragments - if not add it.
             */
            if(btl->btl_latency == latency) {
                mca_bml_base_btl_t* bml_btl_new = 
                    mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
                *bml_btl_new = *bml_btl;
            }

            /* set endpoint max send size as min of available btls */
            if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
               bml_endpoint->btl_max_send_size = btl->btl_max_send_size;

            /* check flags - is rdma prefered */
            if(btl->btl_flags & (MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET) &&
               proc->proc_arch == ompi_proc_local_proc->proc_arch) {
                mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
                *bml_btl_rdma = *bml_btl;
                if(bml_endpoint->btl_rdma_offset < bml_btl_rdma->btl_min_rdma_size) {
                    bml_endpoint->btl_rdma_offset = bml_btl_rdma->btl_min_rdma_size;
                }
            }
        }
    }

    /* see if we have a connection to everyone else */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];

        if (NULL == proc->proc_bml) {
            if (NULL == unreach_proc) {
                unreach_proc = proc;
            }
            ret = OMPI_ERR_UNREACH;
        }
    }

    if (mca_bml_r2.show_unreach_errors && 
        OMPI_ERR_UNREACH == ret) {
        char *local, *remote;

        orte_ns.get_proc_name_string(&local,
                                     &(ompi_proc_local_proc->proc_name));
        orte_ns.get_proc_name_string(&remote,
                                     &(unreach_proc->proc_name));

        opal_show_help("help-mca-bml-r2",
                       "unreachable proc",
                       true, local, remote, NULL);

        free(local);
        free(remote);
    }

    free(new_procs); 

    return ret;
}
예제 #14
0
/* This function checks how many processes are using the component
   'component_name' for communication and returns this count in 
   'ncount'. Furthermore it returns a 'key', which can be used to split
   the communicator into subgroups, such that the new communicators
   will definitly have all processes communicate with this component.

   Oct 13: the algorithm has been modified such that it returns the 
   number of processes using the specified component and the number
   of processes to which an even 'faster' protocol is being used. (Faster
   specified in this context as being further up in the list of 
   hier_prot protocols specified at the beginning of this file).
*/
static void 
mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
				       int component_level,
				       char *component_name, 
				       int *key,
				       int *ncount )
{
    opal_bitmap_t reachable;
    ompi_proc_t **procs=NULL;
    struct mca_bml_base_btl_array_t *bml_btl_array=NULL;
    mca_bml_base_btl_t *bml_btl=NULL;
    mca_btl_base_component_t *btl=NULL;
    mca_bml_base_endpoint_t *endpoint;

    int i, size, rc;

    int counter=0;
    int firstproc=999999;
    int rank = -1;
    int use_rdma=0;

    /* default values in case an error occurs */
    *ncount=0;
    *key=MPI_UNDEFINED;

    /* Shall we check the the rdma list instead of send-list in the endpoint-structure? */
    use_rdma = mca_coll_hierarch_use_rdma_param;
    
    size = ompi_comm_size ( comm );
    rank = ompi_comm_rank ( comm );

    OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
    rc = opal_bitmap_init(&reachable, size);
    if(OMPI_SUCCESS != rc) {
        return;
    }

    procs = comm->c_local_group->grp_proc_pointers;
    rc = mca_bml.bml_add_procs ( size, procs, &reachable );
    if(OMPI_SUCCESS != rc) {
        return;
    }

    for ( i=0; i<size; i++ ) {
        if ( rank ==  i ) {
            /* skip myself */
            continue;
        }
	
        endpoint = (mca_bml_base_endpoint_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
        if ( use_rdma ) {
            bml_btl_array = &(endpoint->btl_rdma);
        }
        else {
            bml_btl_array = &(endpoint->btl_send);
        }
        bml_btl = mca_bml_base_btl_array_get_index ( bml_btl_array, 0 );
        btl = bml_btl->btl->btl_component;

        /* sanity check */
        if ( strcmp(btl->btl_version.mca_type_name,"btl") ) {
            printf("Oops, got the wrong component! type_name = %s\n",
        	   btl->btl_version.mca_type_name );
        }
	    
        /* check for the required component */
        if (! strcmp (btl->btl_version.mca_component_name, component_name)){
            counter++;
	    if (i<firstproc ) {
                firstproc = i;
	    }
	    continue;
	}	    

    }

    *ncount = counter; 
    /* final decision */
    if ( counter == 0 ) {
        /* this is the section indicating, that we are not 
           using this component */
        firstproc = MPI_UNDEFINED;
    }
    else {
        if ( rank < firstproc ) {
            firstproc = rank;
        }
    }

    *key = firstproc;

    return;
}