int mca_mpool_grdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
        size_t size, mca_mpool_base_registration_t **reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    unsigned char *base, *bound;
    int rc;

    base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
             mca_mpool_base_page_size_log);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    rc = mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
    if(NULL != *reg &&
            (mca_mpool_grdma_component.leave_pinned ||
             ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
             ((*reg)->base == base && (*reg)->bound == bound))) {
        assert(((void*)(*reg)->bound) >= addr);
        if(0 == (*reg)->ref_count &&
                mca_mpool_grdma_component.leave_pinned) {
            opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                  (opal_list_item_t*)(*reg));
        }
        mpool_grdma->stat_cache_found++;
        (*reg)->ref_count++;
    } else {
        mpool_grdma->stat_cache_notfound++;
    }

    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    return rc;
}
示例#2
0
int mca_rcache_vma_find_all(struct mca_rcache_base_module_t* rcache,
        void* addr, size_t size, mca_mpool_base_registration_t **regs,
        int reg_cnt)
{
    void *base_addr, *bound_addr;

    if(size == 0) {
        return OMPI_ERROR;
    }

    base_addr = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound_addr = up_align_addr((void*) ((unsigned long) addr + size - 1), mca_mpool_base_page_size_log);

    return mca_rcache_vma_tree_find_all((mca_rcache_vma_module_t*)rcache,
            (unsigned char*)base_addr, (unsigned char*)bound_addr, regs,
            reg_cnt);
}
示例#3
0
static int register_cache_bypass(mca_mpool_base_module_t *mpool,
        void *addr, size_t size, uint32_t flags,
        mca_mpool_base_registration_t **reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *rdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    base = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = up_align_addr( (void*) ((char*) addr + size - 1),
             mca_mpool_base_page_size_log);
    OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    rdma_reg = (mca_mpool_base_registration_t*)item;

    rdma_reg->mpool = mpool;
    rdma_reg->base = base;
    rdma_reg->bound = bound;
    rdma_reg->flags = flags;

    rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
            base, bound - base + 1, rdma_reg);

    if(rc != OMPI_SUCCESS) {
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    *reg = rdma_reg;
    (*reg)->ref_count++;
    return OMPI_SUCCESS;
}
/*
 * register memory
 */
int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    mca_mpool_base_registration_t *grdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
    int rc;

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    /* if cache bypass is requested don't use the cache */
    base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
                                            mca_mpool_base_page_size_log);
    if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
        do_unregistration_gc(mpool);

    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(flags & MCA_MPOOL_FLAGS_PERSIST) && !bypass_cache) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
        if(*reg != NULL &&
           (mca_mpool_grdma_component.leave_pinned ||
            ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
            ((*reg)->base == base && (*reg)->bound == bound))) {
            if(0 == (*reg)->ref_count &&
               mca_mpool_grdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t*)(*reg));
            }
            mpool_grdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return OMPI_SUCCESS;
        }

        mpool_grdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* If no suitable registration is in cache and leave_pinned isn't
         * set don't use the cache.
         * This is optimisation in case limit is not set. If limit is set we
         * have to put registration into the cache to determine when we hit
         * memory registration limit.
         * NONE: cache is still used for persistent registrations so previous
         * find can find something */
        if(!mca_mpool_grdma_component.leave_pinned) {
            bypass_cache = true;
        }
    }

    OMPI_FREE_LIST_GET(&mpool_grdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    grdma_reg = (mca_mpool_base_registration_t*)item;

    grdma_reg->mpool = mpool;
    grdma_reg->base = base;
    grdma_reg->bound = bound;
    grdma_reg->flags = flags;

    if (false == bypass_cache) {
        rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);

        if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, item);
            return rc;
        }
    }

    while (OMPI_ERR_OUT_OF_RESOURCE ==
           (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
                                                     base, bound - base + 1, grdma_reg))) {
        /* try to remove one unused reg and retry */
        if (!mca_mpool_grdma_evict (mpool)) {
            break;
        }
    }

    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
        if (false == bypass_cache) {
            mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
        }
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, item);
        return rc;
    }

    *reg = grdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);
    return OMPI_SUCCESS;
}
示例#5
0
/*
 * register memory
 */
int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *rdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    /* if cache bypass is requested don't use the cache */
    if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
        return register_cache_bypass(mpool, addr, size, flags, reg);
    }

    base = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = up_align_addr((void*)((char*) addr + size - 1),
             mca_mpool_base_page_size_log);
    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
        if(*reg != NULL &&
                (mca_mpool_rdma_component.leave_pinned ||
                 ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
                 ((*reg)->base == base && (*reg)->bound == bound))) {
            if(0 == (*reg)->ref_count &&
                    mca_mpool_rdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)(*reg));
            }
            mpool_rdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return MPI_SUCCESS;
        }

        mpool_rdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* If no suitable registration is in cache and leave_pinned isn't
         * set and size of registration cache is unlimited don't use the cache.
         * This is optimisation in case limit is not set. If limit is set we
         * have to put registration into the cache to determine when we hit
         * memory registration limit.
         * NONE: cache is still used for persistent registrations so previous
         * find can find something */
        if(!mca_mpool_rdma_component.leave_pinned &&
                 mca_mpool_rdma_component.rcache_size_limit == 0) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return register_cache_bypass(mpool, addr, size, flags, reg);
        }
    }

    OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    rdma_reg = (mca_mpool_base_registration_t*)item;

    rdma_reg->mpool = mpool;
    rdma_reg->base = base;
    rdma_reg->bound = bound;
    rdma_reg->flags = flags;

    while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
             mca_mpool_rdma_component.rcache_size_limit)) ==
            OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
        mca_mpool_base_registration_t *old_reg;
        /* try to remove one unused reg and retry */
        old_reg = (mca_mpool_base_registration_t*)
            opal_list_get_last(&mpool_rdma->mru_list);
        if(opal_list_get_end(&mpool_rdma->mru_list) !=
                (opal_list_item_t*)old_reg) {
            rc = dereg_mem(mpool, old_reg);
            if(MPI_SUCCESS == rc) {
                mpool->rcache->rcache_delete(mpool->rcache, old_reg);
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)old_reg);
                OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                        (ompi_free_list_item_t*)old_reg);
                mpool_rdma->stat_evicted++;
            } else
                break;
        } else
            break;
    }

    if(rc != OMPI_SUCCESS) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
            base, bound - base + 1, rdma_reg);

    if(rc != OMPI_SUCCESS) {
        mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    *reg = rdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    return OMPI_SUCCESS;
}
示例#6
0
/* look up the remote pointer in the peer rcache and attach if
 * necessary */
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
						      size_t size, int flags, void **local_ptr)
{
    struct mca_rcache_base_module_t *rcache = ep->rcache;
    mca_mpool_base_registration_t *regs[10], *reg = NULL;
    xpmem_addr_t xpmem_addr;
    uintptr_t base, bound;
    int rc, i;

    /* protect rcache access */
    OPAL_THREAD_LOCK(&ep->lock);

    /* use btl/self for self communication */
    assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK);

    base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align);
    bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1),
                                      mca_btl_vader_component.log_attach_align) + 1;
    if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) {
        bound = VADER_MAX_ADDRESS;
    }

    /* several segments may match the base pointer */
    rc = rcache->rcache_find_all (rcache, (void *) base, bound - base, regs, 10);
    for (i = 0 ; i < rc ; ++i) {
        if (bound <= (uintptr_t)regs[i]->bound && base  >= (uintptr_t)regs[i]->base) {
            opal_atomic_add (&regs[i]->ref_count, 1);
            reg = regs[i];
            goto reg_found;
        }

        if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) {
            continue;
        }

        /* remove this pointer from the rcache and decrement its reference count
           (so it is detached later) */
        rc = rcache->rcache_delete (rcache, regs[i]);
        if (OPAL_UNLIKELY(0 != rc)) {
            /* someone beat us to it? */
            break;
        }

        /* start the new segment from the lower of the two bases */
        base = (uintptr_t) regs[i]->base < base ? (uintptr_t) regs[i]->base : base;                        

        opal_atomic_add (&regs[i]->ref_count, -1);

        if (OPAL_LIKELY(0 == regs[i]->ref_count)) {
            /* this pointer is not in use */
            (void) xpmem_detach (regs[i]->alloc_base);
            OBJ_RELEASE(regs[i]);
        }

        break;
    }

    reg = OBJ_NEW(mca_mpool_base_registration_t);
    if (OPAL_LIKELY(NULL != reg)) {
        /* stick around for awhile */
        reg->ref_count = 2;
        reg->base  = (unsigned char *) base;
        reg->bound = (unsigned char *) bound;
        reg->flags = flags;

#if defined(HAVE_SN_XPMEM_H)
        xpmem_addr.id     = ep->apid;
#else
        xpmem_addr.apid   = ep->apid;
#endif
        xpmem_addr.offset = base;

        reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL);
        if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) {
            OPAL_THREAD_UNLOCK(&ep->lock);
            OBJ_RELEASE(reg);
            return NULL;
        }

        opal_memchecker_base_mem_defined (reg->alloc_base, bound - base);

        rcache->rcache_insert (rcache, reg, 0);
    }

reg_found:
    opal_atomic_wmb ();
    *local_ptr = (void *) ((uintptr_t) reg->alloc_base +
                           (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));

    OPAL_THREAD_UNLOCK(&ep->lock);

    return reg;
}
示例#7
0
/*
 * register memory
 */
int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
    const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST);
    mca_mpool_base_registration_t *grdma_reg;
    opal_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    /* if cache bypass is requested don't use the cache */
    base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
                                            mca_mpool_base_page_size_log);
    if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
        do_unregistration_gc(mpool);

#if OPAL_CUDA_GDR_SUPPORT
    if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
        size_t psize;
        mca_common_cuda_get_address_range(&base, &psize, addr);
        bound = base + psize - 1;
        /* Check to see if this memory is in the cache and if it has been freed. If so,
         * this call will boot it out of the cache. */
        check_for_cuda_freed_memory(mpool, base, psize);
    }
#endif /* OPAL_CUDA_GDR_SUPPORT */

    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(bypass_cache || persist)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
        if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
            if (0 == (*reg)->ref_count) {
                /* Leave pinned must be set for this to still be in the rcache. */
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t *)(*reg));
            }

            /* This segment fits fully within an existing segment. */
            mpool_grdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return OPAL_SUCCESS;
        }

        mpool_grdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* Unless explicitly requested by the caller always store the
         * registration in the rcache. This will speed up the case where
         * no leave pinned protocol is in use but the same segment is in
         * use in multiple simultaneous transactions. We used to set bypass_cache
         * here is !mca_mpool_grdma_component.leave_pinned. */
    }

    item = opal_free_list_get (&mpool_grdma->reg_list);
    if(NULL == item) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    grdma_reg = (mca_mpool_base_registration_t*)item;

    grdma_reg->mpool = mpool;
    grdma_reg->base = base;
    grdma_reg->bound = bound;
    grdma_reg->flags = flags;
#if OPAL_CUDA_GDR_SUPPORT
    if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
        mca_common_cuda_get_buffer_id(grdma_reg);
    }
#endif /* OPAL_CUDA_GDR_SUPPORT */

    if (false == bypass_cache) {
        rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);

        if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            opal_free_list_return (&mpool_grdma->reg_list, item);
            return rc;
        }
    }

    while (OPAL_ERR_OUT_OF_RESOURCE ==
           (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
                                                     base, bound - base + 1, grdma_reg))) {
        /* try to remove one unused reg and retry */
        if (!mca_mpool_grdma_evict (mpool)) {
            break;
        }
    }

    if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
        if (false == bypass_cache) {
            mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
        }
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        opal_free_list_return (&mpool_grdma->reg_list, item);
        return rc;
    }

    *reg = grdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);
    return OPAL_SUCCESS;
}