int mca_mpool_grdma_deregister(struct mca_mpool_base_module_t *mpool,
                               mca_mpool_base_registration_t *reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
    int rc = OMPI_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg->ref_count--;
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OMPI_SUCCESS;
    }

    if(registration_is_cachebale(reg)) {
        opal_list_append(&mpool_grdma->pool->lru_list, (opal_list_item_t *) reg);
    } else {
        rc = dereg_mem (reg);
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

    return rc;
}
Exemple #2
0
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    int rc = OMPI_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg->ref_count--;
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OMPI_SUCCESS;
    }
    if(mca_mpool_rdma_component.leave_pinned &&
       !(reg->flags & (MCA_MPOOL_FLAGS_CACHE_BYPASS|MCA_MPOOL_FLAGS_PERSIST))) {
        /* if leave_pinned is set don't deregister memory, but put it
         * on MRU list for future use */
        opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
    } else {
        rc = dereg_mem(mpool, reg);
        if(OMPI_SUCCESS == rc) {
            if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
                mpool->rcache->rcache_delete(mpool->rcache, reg);
            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    return rc;
}
/* This function must be called with the rcache lock held */
static inline void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    opal_list_item_t *item;

    /* Remove registration from garbage collection list
       before deregistering it */
    while (NULL != 
           (item = opal_list_remove_first(&mpool_grdma->pool->gc_list))) {
        dereg_mem((mca_mpool_base_registration_t *) item);
    }
}
static inline void do_unregistration_gc (mca_rcache_base_module_t *rcache)
{
    mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
    opal_list_item_t *item;

    /* Remove registration from garbage collection list before deregistering it */
    while (NULL != (item = opal_lifo_pop_atomic (&rcache_grdma->cache->gc_lifo))) {
        OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
                             "deleting stale registration %p", (void *) item));
        dereg_mem ((mca_rcache_base_registration_t *) item);
    }
}
Exemple #5
0
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
    mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
    int reg_cnt, i, rc = OPAL_SUCCESS;
    mca_mpool_base_registration_t *reg;

    mpool->rcache->rcache_find(mpool->rcache, addr, size, &reg);
    if (NULL == reg) {
        return OPAL_SUCCESS;
    }

    /* If not previously freed memory, just return 0 */
    if (!(mca_common_cuda_previously_freed_memory(reg))) {
        return OPAL_SUCCESS;
    }

    /* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "Before free"); */

    /* This memory has been freed.  Find all registrations and delete */
    do {
        reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, reg->base, reg->bound - reg->base + 1,
                                                 regs, GRDMA_MPOOL_NREGS);
        for(i = 0 ; i < reg_cnt ; ++i) {
            regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
            if (regs[i]->ref_count) {
                opal_output(0, "Release FAILED: ref_count=%d, base=%p, bound=%p, size=%d",
                            regs[i]->ref_count, regs[i]->base, regs[i]->bound,
                            (int) (regs[i]->bound - regs[i]->base + 1));
                /* memory is being freed, but there are registration in use that
                 * covers the memory. This can happen even in a correct program,
                 * but may also be an user error. We can't tell. Mark the
                 * registration as invalid. It will not be used any more and
                 * will be unregistered when ref_count will become zero */
                rc = OPAL_ERROR; /* tell caller that something was wrong */
            } else {
                opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
                /* Now deregister.  Do not use gc_list as we need to kick this out now. */
                dereg_mem(regs[i]);
            }
        }
    } while(reg_cnt == GRDMA_MPOOL_NREGS);

    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    /* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "After free");*/

    return rc;
}
static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_reg, void *ctx)
{
    mca_rcache_base_find_args_t *args = (mca_rcache_base_find_args_t *) ctx;
    mca_rcache_grdma_module_t *rcache_grdma = args->rcache_grdma;

    if ((grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) || &rcache_grdma->super != grdma_reg->rcache ||
        grdma_reg->base > args->base || grdma_reg->bound < args->bound) {
        return 0;
    }

    if (OPAL_UNLIKELY((args->access_flags & grdma_reg->access_flags) != args->access_flags)) {
        args->access_flags |= grdma_reg->access_flags;

        if (0 != grdma_reg->ref_count) {
            if (!(grdma_reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
                mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, grdma_reg);
            }

            /* mark the registration to go away when it is deregistered */
            grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID | MCA_RCACHE_FLAGS_CACHE_BYPASS;
        } else {
            if (registration_is_cacheable(grdma_reg)) {
                opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
            }

            dereg_mem (grdma_reg);
        }
    } else {
        if (0 == grdma_reg->ref_count) {
            /* Leave pinned must be set for this to still be in the rcache. */
            opal_list_remove_item(&rcache_grdma->cache->lru_list,
                                  (opal_list_item_t *) grdma_reg);
        }

        /* This segment fits fully within an existing segment. */
        rcache_grdma->stat_cache_hit++;
        int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1);
        OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
                             "returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
        (void)ref_cnt;
        args->reg = grdma_reg;
        return 1;
    }

    /* can't use this registration */
    return 0;
}
Exemple #7
0
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
    int reg_cnt, i;

    /* Statistic */
    if (true == mca_mpool_grdma_component.print_stats) {
        opal_output(0, "%s grdma: stats "
                "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss,
                mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound,
                mpool_grdma->stat_evicted);
    }

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    do_unregistration_gc(mpool);

    do {
        reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
                                                 regs, GRDMA_MPOOL_NREGS);

        for (i = 0 ; i < reg_cnt ; ++i) {
            if (regs[i]->ref_count) {
                regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */
            } else if (mca_mpool_grdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t *) regs[i]);
            }

	    (void) dereg_mem(regs[i]);
        }
    } while (reg_cnt == GRDMA_MPOOL_NREGS);

    OBJ_RELEASE(mpool_grdma->pool);

    OBJ_DESTRUCT(&mpool_grdma->reg_list);
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

    /* this mpool was allocated by grdma_init in mpool_grdma_component.c */
    free(mpool);
}
Exemple #8
0
int mca_mpool_rdma_release_memory(struct mca_mpool_base_module_t *mpool,
        void *base, size_t size)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *reg;
    ompi_pointer_array_t regs;
    int reg_cnt, i, err = 0;

    OBJ_CONSTRUCT(&regs, ompi_pointer_array_t);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size, &regs);

    for(i = 0; i < reg_cnt; i++) {
        reg = (mca_mpool_base_registration_t*)
            ompi_pointer_array_get_item(&regs, i);

        if(0 == reg->ref_count) {
            if(dereg_mem(mpool, reg) != OMPI_SUCCESS) {
                err++;
                continue;
            }
        } else {
            /* remove registration from cache and wait for ref_count goes to
             * zero before unregister memory. Note that our registered memory
             * statistic can go wrong at this point, but it is better than
             * potential memory corruption. And we return error in this case to
             * the caller */
            reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS;
            err++; /* tell caller that something was wrong */
        }
        mpool->rcache->rcache_delete(mpool->rcache, reg);
        if(0 == reg->ref_count) {
            opal_list_remove_item(&mpool_rdma->mru_list,
                    (opal_list_item_t*)reg);
            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    ompi_pointer_array_remove_all(&regs);

    return err?OMPI_ERROR:OMPI_SUCCESS;
}
static inline bool mca_mpool_grdma_evict_lru_local (mca_mpool_grdma_pool_t *pool)
{
    mca_mpool_grdma_module_t *mpool_grdma;
    mca_mpool_base_registration_t *old_reg;

    old_reg = (mca_mpool_base_registration_t *)
        opal_list_remove_first (&pool->lru_list);
    if (NULL == old_reg) {
        return false;
    }

    mpool_grdma = (mca_mpool_grdma_module_t *) old_reg->mpool;

    (void) dereg_mem (old_reg);

    mpool_grdma->stat_evicted++;

    return true;
}
static inline bool mca_rcache_grdma_evict_lru_local (mca_rcache_grdma_cache_t *cache)
{
    mca_rcache_grdma_module_t *rcache_grdma;
    mca_rcache_base_registration_t *old_reg;

    opal_mutex_lock (&cache->vma_module->vma_lock);
    old_reg = (mca_rcache_base_registration_t *)
        opal_list_remove_first (&cache->lru_list);
    opal_mutex_unlock (&cache->vma_module->vma_lock);
    if (NULL == old_reg) {
        return false;
    }

    rcache_grdma = (mca_rcache_grdma_module_t *) old_reg->rcache;

    (void) dereg_mem (old_reg);

    rcache_grdma->stat_evicted++;

    return true;
}
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    int rc = OMPI_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg->ref_count--;
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OMPI_SUCCESS;
    }
    if(mca_mpool_rdma_component.leave_pinned && registration_is_cachebale(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on MRU list for future use */
        opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
    } else {
	/* Remove from rcache first */
	if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
	    mpool->rcache->rcache_delete(mpool->rcache, reg);

	/* Drop the rcache lock before deregistring the memory */
	OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        rc = dereg_mem(mpool, reg);
	OPAL_THREAD_LOCK(&mpool->rcache->lock);

        if(OMPI_SUCCESS == rc) {
            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

    return rc;
}
/* This function must be called with the rcache lock held */
static void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *reg;

    do {
        /* Remove registration from garbage collection list
           before deregistering it */
        reg = (mca_mpool_base_registration_t *)
            opal_list_remove_first(&mpool_rdma->gc_list);
        mpool->rcache->rcache_delete(mpool->rcache, reg);

        /* Drop the rcache lock before calling dereg_mem as there
           may be memory allocations */
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        dereg_mem(mpool, reg);
        OPAL_THREAD_LOCK(&mpool->rcache->lock);

        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                (ompi_free_list_item_t*)reg);
    } while(!opal_list_is_empty(&mpool_rdma->gc_list));
}
Exemple #13
0
/*
 * register memory
 */
int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *rdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    /* if cache bypass is requested don't use the cache */
    if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
        return register_cache_bypass(mpool, addr, size, flags, reg);
    }

    base = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = up_align_addr((void*)((char*) addr + size - 1),
             mca_mpool_base_page_size_log);
    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
        if(*reg != NULL &&
                (mca_mpool_rdma_component.leave_pinned ||
                 ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
                 ((*reg)->base == base && (*reg)->bound == bound))) {
            if(0 == (*reg)->ref_count &&
                    mca_mpool_rdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)(*reg));
            }
            mpool_rdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return MPI_SUCCESS;
        }

        mpool_rdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* If no suitable registration is in cache and leave_pinned isn't
         * set and size of registration cache is unlimited don't use the cache.
         * This is optimisation in case limit is not set. If limit is set we
         * have to put registration into the cache to determine when we hit
         * memory registration limit.
         * NONE: cache is still used for persistent registrations so previous
         * find can find something */
        if(!mca_mpool_rdma_component.leave_pinned &&
                 mca_mpool_rdma_component.rcache_size_limit == 0) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return register_cache_bypass(mpool, addr, size, flags, reg);
        }
    }

    OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    rdma_reg = (mca_mpool_base_registration_t*)item;

    rdma_reg->mpool = mpool;
    rdma_reg->base = base;
    rdma_reg->bound = bound;
    rdma_reg->flags = flags;

    while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
             mca_mpool_rdma_component.rcache_size_limit)) ==
            OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
        mca_mpool_base_registration_t *old_reg;
        /* try to remove one unused reg and retry */
        old_reg = (mca_mpool_base_registration_t*)
            opal_list_get_last(&mpool_rdma->mru_list);
        if(opal_list_get_end(&mpool_rdma->mru_list) !=
                (opal_list_item_t*)old_reg) {
            rc = dereg_mem(mpool, old_reg);
            if(MPI_SUCCESS == rc) {
                mpool->rcache->rcache_delete(mpool->rcache, old_reg);
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)old_reg);
                OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                        (ompi_free_list_item_t*)old_reg);
                mpool_rdma->stat_evicted++;
            } else
                break;
        } else
            break;
    }

    if(rc != OMPI_SUCCESS) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
            base, bound - base + 1, rdma_reg);

    if(rc != OMPI_SUCCESS) {
        mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    *reg = rdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    return OMPI_SUCCESS;
}
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *reg;
    mca_mpool_base_registration_t *regs[RDMA_MPOOL_NREGS];
    int reg_cnt, i;
    int rc;

    /* Statistic */
    if(true == mca_mpool_rdma_component.print_stats) {
        opal_output(0, "%s rdma: stats "
                "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss,
                mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound,
                mpool_rdma->stat_evicted);
    }

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    if(!opal_list_is_empty(&mpool_rdma->gc_list))
        do_unregistration_gc(mpool);
    do {
        reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
                regs, RDMA_MPOOL_NREGS);

        for(i = 0; i < reg_cnt; i++) {
            reg = regs[i];

            if(reg->ref_count) {
                reg->ref_count = 0; /* otherway dereg will fail on assert */
            } else if (mca_mpool_rdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)reg);
            }

	    /* Remove from rcache first */
            mpool->rcache->rcache_delete(mpool->rcache, reg);

	    /* Drop lock before deregistering memory */
	    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
	    rc = dereg_mem(mpool, reg);
	    OPAL_THREAD_LOCK(&mpool->rcache->lock);

            if(rc != OMPI_SUCCESS) {
		/* Potentially lose track of registrations
		   do we have to put it back? */
                continue;
            }

            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    } while(reg_cnt == RDMA_MPOOL_NREGS);

    OBJ_DESTRUCT(&mpool_rdma->mru_list);
    OBJ_DESTRUCT(&mpool_rdma->gc_list);
    OBJ_DESTRUCT(&mpool_rdma->reg_list);
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

}