/**
 * Test - if we finished with the coll fragment descriptor,
 * and free all resouces if so.
 **/
int
mca_bcol_iboffload_free_tasks_frags_resources(
    mca_bcol_iboffload_collfrag_t *collfrag,
    ompi_free_list_t *frags_free)
{
    int rc;

    mca_bcol_iboffload_task_t *task = collfrag->tasks_to_release;
    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;

    /* Support for multiple frags we will add later
     * n_outstanding_frags = coll_req->n_frags_sent - coll_req->n_frag_net_complete; */

    while (NULL != task) {
        /* Return frag (is the reference counter is zero)*/
        rc = release_frags_on_task(task, frags_free);
        if (OMPI_SUCCESS != rc) {
            return OMPI_ERROR;
        }

        /* Return task: if the pointer is NULL => we assume the task
           is a member of the common task list (tasks_free) */
        if (NULL == task->task_list) {
            OMPI_FREE_LIST_RETURN(&cm->tasks_free,
                                  (ompi_free_list_item_t *) task);
        } else {
            OMPI_FREE_LIST_RETURN(task->task_list,
                                  (ompi_free_list_item_t *) task);
        }

        task = task->next_task;
    }

    return OMPI_SUCCESS;
}
Example #2
0
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    int rc = OMPI_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg->ref_count--;
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OMPI_SUCCESS;
    }
    if(mca_mpool_rdma_component.leave_pinned &&
       !(reg->flags & (MCA_MPOOL_FLAGS_CACHE_BYPASS|MCA_MPOOL_FLAGS_PERSIST))) {
        /* if leave_pinned is set don't deregister memory, but put it
         * on MRU list for future use */
        opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
    } else {
        rc = dereg_mem(mpool, reg);
        if(OMPI_SUCCESS == rc) {
            if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
                mpool->rcache->rcache_delete(mpool->rcache, reg);
            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    return rc;
}
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
    gni_return_t rc;

    do {
        if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
            /* nothing to do */
            break;
        }

        if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
            rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
                                   MCA_BTL_UGNI_TAG_DISCONNECT);
            if (GNI_RC_SUCCESS != rc) {
                BTL_VERBOSE(("btl/ugni could not send close message"));
            }

            /* we might want to wait for local completion here (do we even care) */
        }

        (void) ompi_common_ugni_ep_destroy (&ep->smsg_ep_handle);
        (void) ompi_common_ugni_ep_destroy (&ep->rdma_ep_handle);

        OMPI_FREE_LIST_RETURN(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox));
        ep->mailbox = NULL;

        ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
    } while (0);

    return OMPI_SUCCESS;
}
/*
 * Return all the requests in the per-file freelist to the global list
 */
void mca_io_base_request_return(ompi_file_t *file)
{
    ompi_free_list_item_t *next;

    OPAL_THREAD_LOCK(&file->f_io_requests_lock);
    while (NULL != (next = (ompi_free_list_item_t*) 
                    opal_list_remove_first(&file->f_io_requests))) {
        OMPI_FREE_LIST_RETURN(&mca_io_base_requests, next);
    }
    OPAL_THREAD_UNLOCK(&file->f_io_requests_lock);
}
Example #5
0
int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
                        struct opal_convertor_t *convertor,
                        struct ompi_message_t **message,
                        struct mca_mtl_request_t *mtl_request)
{
#if MXM_API >= MXM_VERSION(1,5)
    int ret;
    mxm_error_t err;
    mxm_recv_req_t *mxm_recv_req;
    mca_mtl_mxm_request_t *mtl_mxm_request;

    ompi_mtl_mxm_message_t *msgp =
                        (ompi_mtl_mxm_message_t *) (*message)->req_ptr;

    mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
    mxm_recv_req = &mtl_mxm_request->mxm.recv;

    /* prepare a receive request embedded in the MTL request */
    ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    mxm_recv_req->tag       = msgp->tag;
    mxm_recv_req->tag_mask  = msgp->tag_mask;
    mxm_recv_req->base.mq   = msgp->mq;
    mxm_recv_req->base.conn = msgp->conn;

    err = mxm_message_recv(mxm_recv_req, msgp->mxm_msg);
    if (OPAL_UNLIKELY(MXM_OK != err)) {
        orte_show_help("help-mtl-mxm.txt", "error posting message receive", true,
                       mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
        return OMPI_ERROR;
    }

    OMPI_FREE_LIST_RETURN(&mca_mtl_mxm_component.mxm_messages,
                         (ompi_free_list_item_t *) msgp);

    ompi_message_return(*message);
    (*message) = MPI_MESSAGE_NULL;

    return OMPI_SUCCESS;
#else
    return OMPI_ERR_NOT_IMPLEMENTED;
#endif
}
Example #6
0
int mca_mpool_rdma_release_memory(struct mca_mpool_base_module_t *mpool,
        void *base, size_t size)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *reg;
    ompi_pointer_array_t regs;
    int reg_cnt, i, err = 0;

    OBJ_CONSTRUCT(&regs, ompi_pointer_array_t);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size, &regs);

    for(i = 0; i < reg_cnt; i++) {
        reg = (mca_mpool_base_registration_t*)
            ompi_pointer_array_get_item(&regs, i);

        if(0 == reg->ref_count) {
            if(dereg_mem(mpool, reg) != OMPI_SUCCESS) {
                err++;
                continue;
            }
        } else {
            /* remove registration from cache and wait for ref_count goes to
             * zero before unregister memory. Note that our registered memory
             * statistic can go wrong at this point, but it is better than
             * potential memory corruption. And we return error in this case to
             * the caller */
            reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS;
            err++; /* tell caller that something was wrong */
        }
        mpool->rcache->rcache_delete(mpool->rcache, reg);
        if(0 == reg->ref_count) {
            opal_list_remove_item(&mpool_rdma->mru_list,
                    (opal_list_item_t*)reg);
            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    ompi_pointer_array_remove_all(&regs);

    return err?OMPI_ERROR:OMPI_SUCCESS;
}
static inline int dereg_mem(mca_mpool_base_registration_t *reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool;
    int rc;

    if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
        reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg);

    /* Drop the rcache lock before deregistring the memory */
    OPAL_THREAD_UNLOCK(&reg->mpool->rcache->lock);
    rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data,
                                               reg);
    OPAL_THREAD_LOCK(&reg->mpool->rcache->lock);

    if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
        OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list,
                              (ompi_free_list_item_t *) reg);
    }

    return rc;
}
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
                            mca_mpool_base_registration_t *reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    int rc = OMPI_SUCCESS;
    assert(reg->ref_count > 0);

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    reg->ref_count--;
    if(reg->ref_count > 0) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OMPI_SUCCESS;
    }
    if(mca_mpool_rdma_component.leave_pinned && registration_is_cachebale(reg))
    {
        /* if leave_pinned is set don't deregister memory, but put it
         * on MRU list for future use */
        opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
    } else {
	/* Remove from rcache first */
	if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
	    mpool->rcache->rcache_delete(mpool->rcache, reg);

	/* Drop the rcache lock before deregistring the memory */
	OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        rc = dereg_mem(mpool, reg);
	OPAL_THREAD_LOCK(&mpool->rcache->lock);

        if(OMPI_SUCCESS == rc) {
            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    }
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

    return rc;
}
/* This function must be called with the rcache lock held */
static void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *reg;

    do {
        /* Remove registration from garbage collection list
           before deregistering it */
        reg = (mca_mpool_base_registration_t *)
            opal_list_remove_first(&mpool_rdma->gc_list);
        mpool->rcache->rcache_delete(mpool->rcache, reg);

        /* Drop the rcache lock before calling dereg_mem as there
           may be memory allocations */
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        dereg_mem(mpool, reg);
        OPAL_THREAD_LOCK(&mpool->rcache->lock);

        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                (ompi_free_list_item_t*)reg);
    } while(!opal_list_is_empty(&mpool_rdma->gc_list));
}
Example #10
0
static int register_cache_bypass(mca_mpool_base_module_t *mpool,
        void *addr, size_t size, uint32_t flags,
        mca_mpool_base_registration_t **reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *rdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    base = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = up_align_addr( (void*) ((char*) addr + size - 1),
             mca_mpool_base_page_size_log);
    OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    rdma_reg = (mca_mpool_base_registration_t*)item;

    rdma_reg->mpool = mpool;
    rdma_reg->base = base;
    rdma_reg->bound = bound;
    rdma_reg->flags = flags;

    rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
            base, bound - base + 1, rdma_reg);

    if(rc != OMPI_SUCCESS) {
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    *reg = rdma_reg;
    (*reg)->ref_count++;
    return OMPI_SUCCESS;
}
Example #11
0
void test2(void)
{
    ompi_free_list_t key_list;
    ompi_free_list_item_t * new_value;
    ompi_rb_tree_t tree;
    int rc, i, size;
    void * result, * lookup;
    void * mem[NUM_ALLOCATIONS];
    ompi_free_list_item_t * key_array[NUM_ALLOCATIONS];
    struct timeval start, end;
    
    OBJ_CONSTRUCT(&key_list, ompi_free_list_t);
    ompi_free_list_init_new(&key_list, sizeof(ompi_test_rb_value_t),
            CACHE_LINE_SIZE,
            OBJ_CLASS(ompi_test_rb_value_t), 
            0,CACHE_LINE_SIZE,
            0, -1 , 128, NULL);
    
    OBJ_CONSTRUCT(&tree, ompi_rb_tree_t);
    rc = ompi_rb_tree_init(&tree, mem_node_compare);
    if(!test_verify_int(OMPI_SUCCESS, rc)) {
        test_failure("failed to properly initialize the tree");
    }
  
    size = 1;
    for(i = 0; i < NUM_ALLOCATIONS; i++)
    {
        mem[i] = malloc(size);
        if(NULL == mem[i])
        {
            test_failure("system out of memory");
            return;
        }   
        OMPI_FREE_LIST_GET(&key_list, new_value, rc);
        if(OMPI_SUCCESS != rc)
        {
            test_failure("failed to get memory from free list");
        }
        key_array[i] = new_value;
        ((ompi_test_rb_value_t *) new_value)->key.bottom = mem[i];
        ((ompi_test_rb_value_t *) new_value)->key.top = 
                                            (void *) ((size_t) mem[i] + size - 1);
        ((ompi_test_rb_value_t *) new_value)->registered_mpools[0] = (void *) i;
        rc = ompi_rb_tree_insert(&tree, &((ompi_test_rb_value_t *)new_value)->key, 
                        new_value);
        if(OMPI_SUCCESS != rc) 
        {
            test_failure("failed to properly insert a new node");
        }
        size += 1;   
    }
    
    gettimeofday(&start, NULL);
    for(i = 0; i < NUM_ALLOCATIONS; i++)
    {
        lookup = (void *) ((size_t) mem[i] + i);
        result = ompi_rb_tree_find(&tree, &lookup);
        if(NULL == result) 
        {
            test_failure("lookup returned null!");
        } else if(i != ((int) ((ompi_test_rb_value_t *) result)->registered_mpools[0]))
        {
            test_failure("lookup returned wrong node!");
        }
        result = ompi_rb_tree_find(&tree, &lookup);
        if(NULL == result) 
        {
            test_failure("lookup returned null!");
        } else if(i != ((int) ((ompi_test_rb_value_t *) result)->registered_mpools[0]))
        {
            test_failure("lookup returned wrong node!");
        }
    }

    gettimeofday(&end, NULL);

#if 0
    i = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
    printf("In a %d node tree, %d lookups took %f microseonds each\n", 
            NUM_ALLOCATIONS, NUM_ALLOCATIONS * 2, 
            (float) i / (float) (NUM_ALLOCATIONS * 2));
#endif

    for(i = 0; i < NUM_ALLOCATIONS; i++)
    {
        if(NULL != mem[i])
        {
            free(mem[i]);
        }
        OMPI_FREE_LIST_RETURN(&(key_list), key_array[i]);
    }

    OBJ_DESTRUCT(&tree);
    OBJ_DESTRUCT(&key_list);
}
/*
 * Return a module-specific IO MPI_Request
 */
int mca_io_base_request_alloc(ompi_file_t *file, 
                              mca_io_base_request_t **req)
{
    int err;
    mca_io_base_module_request_once_init_fn_t func;
    ompi_free_list_item_t *item;

    /* See if we've got a request on the module's freelist (which is
       cached on the file, since there's only one module per
       MPI_File).  Use a quick-but-not-entirely-accurate (but good
       enough) check as a slight optimization to potentially having to
       avoid locking and unlocking. */

    if (opal_list_get_size(&file->f_io_requests) > 0) {
        OPAL_THREAD_LOCK(&file->f_io_requests_lock);
        if (opal_list_get_size(&file->f_io_requests) > 0) {
            *req = (mca_io_base_request_t*) 
                opal_list_remove_first(&file->f_io_requests);
            (*req)->free_called = false;
        } else {
            *req = NULL;
        }
        OPAL_THREAD_UNLOCK(&file->f_io_requests_lock);
    } else {
        *req = NULL;
    }
        
    /* Nope, we didn't have one on the file freelist, so let's get one
       off the global freelist */

    if (NULL == *req) {
        OMPI_FREE_LIST_GET(&mca_io_base_requests, item, err);
        *req = (mca_io_base_request_t*) item;

        /* Call the per-use init function, if it exists */

        switch (file->f_io_version) {
        case MCA_IO_BASE_V_2_0_0:

            /* These can be set once for this request since this
               request will always be used with the same module (and
               therefore, the same MPI_File).  Note that
               (*req)->req_ompi.rq_type is already set by the
               constructor. */

            (*req)->req_file = file;
            (*req)->req_ver = file->f_io_version;
            (*req)->free_called = false;
            (*req)->super.req_free =
                file->f_io_selected_module.v2_0_0.io_module_request_free;
            (*req)->super.req_cancel =
                file->f_io_selected_module.v2_0_0.io_module_request_cancel;

            /* Call the module's once-per process init, if it
               exists */

            func = 
                file->f_io_selected_module.v2_0_0.io_module_request_once_init;
            if (NULL != func) {
                if (OMPI_SUCCESS != 
                    (err = func(&file->f_io_selected_module, *req))) {
                    OMPI_FREE_LIST_RETURN(&mca_io_base_requests, item);
                    return err;
                }
            }

            break;
            
        default:
            OMPI_FREE_LIST_RETURN(&mca_io_base_requests, item);
            return OMPI_ERR_NOT_IMPLEMENTED;
            break;
        }
    }

    /* Initialize the request */

    OMPI_REQUEST_INIT(&((*req)->super), false);
    (*req)->super.req_mpi_object.file = file;

    /*
     * Copied from ompi/mca/pml/base/pml_base_recvreq.h:
     * always set the req_status.MPI_TAG to ANY_TAG before starting the
     * request. This field is used if cancelled to find out if the request
     * has been matched or not.
     */
    (*req)->super.req_status.MPI_TAG = MPI_ANY_TAG;
    (*req)->super.req_status.MPI_ERROR = OMPI_SUCCESS;
    (*req)->super.req_status._count = 0;
    (*req)->super.req_status._cancelled = 0;

    /* All done */

    return OMPI_SUCCESS;
}
/*
 * register memory
 */
int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    mca_mpool_base_registration_t *grdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
    int rc;

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    /* if cache bypass is requested don't use the cache */
    base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
                                            mca_mpool_base_page_size_log);
    if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
        do_unregistration_gc(mpool);

    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(flags & MCA_MPOOL_FLAGS_PERSIST) && !bypass_cache) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
        if(*reg != NULL &&
           (mca_mpool_grdma_component.leave_pinned ||
            ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
            ((*reg)->base == base && (*reg)->bound == bound))) {
            if(0 == (*reg)->ref_count &&
               mca_mpool_grdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t*)(*reg));
            }
            mpool_grdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return OMPI_SUCCESS;
        }

        mpool_grdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* If no suitable registration is in cache and leave_pinned isn't
         * set don't use the cache.
         * This is optimisation in case limit is not set. If limit is set we
         * have to put registration into the cache to determine when we hit
         * memory registration limit.
         * NONE: cache is still used for persistent registrations so previous
         * find can find something */
        if(!mca_mpool_grdma_component.leave_pinned) {
            bypass_cache = true;
        }
    }

    OMPI_FREE_LIST_GET(&mpool_grdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    grdma_reg = (mca_mpool_base_registration_t*)item;

    grdma_reg->mpool = mpool;
    grdma_reg->base = base;
    grdma_reg->bound = bound;
    grdma_reg->flags = flags;

    if (false == bypass_cache) {
        rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);

        if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, item);
            return rc;
        }
    }

    while (OMPI_ERR_OUT_OF_RESOURCE ==
           (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
                                                     base, bound - base + 1, grdma_reg))) {
        /* try to remove one unused reg and retry */
        if (!mca_mpool_grdma_evict (mpool)) {
            break;
        }
    }

    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
        if (false == bypass_cache) {
            mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
        }
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, item);
        return rc;
    }

    *reg = grdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);
    return OMPI_SUCCESS;
}
Example #14
0
int
ompi_mtl_portals_irecv(struct mca_mtl_base_module_t* mtl,
                       struct ompi_communicator_t *comm,
                       int src,
                       int tag,
                       struct ompi_convertor_t *convertor,
                       mca_mtl_request_t *mtl_request)
{
    ptl_match_bits_t match_bits, ignore_bits;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    ptl_handle_me_t me_h;
    int ret;
    ptl_process_id_t remote_proc;
    mca_mtl_base_endpoint_t *endpoint = NULL;
    ompi_mtl_portals_request_t *ptl_request = 
        (ompi_mtl_portals_request_t*) mtl_request;
    ompi_mtl_portals_event_t *recv_event = NULL;
    size_t buflen;

    ptl_request->convertor = convertor;

    if  (MPI_ANY_SOURCE == src) {
        remote_proc.nid = PTL_NID_ANY;
        remote_proc.pid = PTL_PID_ANY;
    } else {
        ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
        endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml;
        remote_proc = endpoint->ptl_proc;
    }

    PTL_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
                      src, tag);

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                         "recv bits: 0x%016llx 0x%016llx\n",
                         match_bits, ignore_bits));

    /* first, check the queue of processed unexpected messages */
    recv_event = ompi_mtl_portals_search_unex_q(match_bits, ignore_bits);
    if (NULL != recv_event) {
        /* found it */
        ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
        OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
                              (ompi_free_list_item_t*)recv_event);
        goto cleanup;
    } else {
restart_search:
        /* check unexpected events */
        recv_event = ompi_mtl_portals_search_unex_events(match_bits, ignore_bits);
        if (NULL != recv_event) {
            /* found it */
            ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
            OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
                                  (ompi_free_list_item_t*)recv_event);
            goto cleanup;
        }
    }

    /* didn't find it, now post the receive */
    ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
                                     &ptl_request->free_after);
    md.length = buflen;

    /* create ME entry */
    ret = PtlMEInsert(ompi_mtl_portals.ptl_match_ins_me_h,
                remote_proc,
                match_bits,
                ignore_bits,
                PTL_UNLINK,
                PTL_INS_BEFORE,
                &me_h);
    if( ret !=PTL_OK) {
        return ompi_common_portals_error_ptl_to_ompi(ret);
    }

    /* associate a memory descriptor with the Match list Entry */
    md.threshold = 0;
    md.options = PTL_MD_OP_PUT | PTL_MD_TRUNCATE | PTL_MD_EVENT_START_DISABLE;
    md.user_ptr = ptl_request;
    md.eq_handle = ompi_mtl_portals.ptl_eq_h;
    ret=PtlMDAttach(me_h, md, PTL_UNLINK, &md_h);
    if( ret !=PTL_OK) {
        return ompi_common_portals_error_ptl_to_ompi(ret);
    }

    /* now try to make active */
    md.threshold = 1;

    /* enable the memory descritor, if the ptl_unexpected_recv_eq_h
     *   queue is empty */
    ret = PtlMDUpdate(md_h, NULL, &md,
                      ompi_mtl_portals.ptl_unexpected_recv_eq_h);
    if (ret == PTL_MD_NO_UPDATE) {
        /* a message has arrived since we searched - look again */
        PtlMDUnlink(md_h);
        if (ptl_request->free_after) { free(md.start); }
        goto restart_search;
    } else if( PTL_OK != ret ) {
        return ompi_common_portals_error_ptl_to_ompi(ret);
    }

    ptl_request->event_callback = ompi_mtl_portals_recv_progress;

 cleanup:

    return OMPI_SUCCESS;
}
/*
 * put an item back into the free list
 */
void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) { 
    OMPI_FREE_LIST_RETURN(&mca_mpool_base_tree_item_free_list,
                          &(item->super));
}
Example #16
0
/*
 * register memory
 */
int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *rdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    /* if cache bypass is requested don't use the cache */
    if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
        return register_cache_bypass(mpool, addr, size, flags, reg);
    }

    base = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = up_align_addr((void*)((char*) addr + size - 1),
             mca_mpool_base_page_size_log);
    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
        if(*reg != NULL &&
                (mca_mpool_rdma_component.leave_pinned ||
                 ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
                 ((*reg)->base == base && (*reg)->bound == bound))) {
            if(0 == (*reg)->ref_count &&
                    mca_mpool_rdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)(*reg));
            }
            mpool_rdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return MPI_SUCCESS;
        }

        mpool_rdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* If no suitable registration is in cache and leave_pinned isn't
         * set and size of registration cache is unlimited don't use the cache.
         * This is optimisation in case limit is not set. If limit is set we
         * have to put registration into the cache to determine when we hit
         * memory registration limit.
         * NONE: cache is still used for persistent registrations so previous
         * find can find something */
        if(!mca_mpool_rdma_component.leave_pinned &&
                 mca_mpool_rdma_component.rcache_size_limit == 0) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return register_cache_bypass(mpool, addr, size, flags, reg);
        }
    }

    OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    rdma_reg = (mca_mpool_base_registration_t*)item;

    rdma_reg->mpool = mpool;
    rdma_reg->base = base;
    rdma_reg->bound = bound;
    rdma_reg->flags = flags;

    while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
             mca_mpool_rdma_component.rcache_size_limit)) ==
            OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
        mca_mpool_base_registration_t *old_reg;
        /* try to remove one unused reg and retry */
        old_reg = (mca_mpool_base_registration_t*)
            opal_list_get_last(&mpool_rdma->mru_list);
        if(opal_list_get_end(&mpool_rdma->mru_list) !=
                (opal_list_item_t*)old_reg) {
            rc = dereg_mem(mpool, old_reg);
            if(MPI_SUCCESS == rc) {
                mpool->rcache->rcache_delete(mpool->rcache, old_reg);
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)old_reg);
                OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                        (ompi_free_list_item_t*)old_reg);
                mpool_rdma->stat_evicted++;
            } else
                break;
        } else
            break;
    }

    if(rc != OMPI_SUCCESS) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
            base, bound - base + 1, rdma_reg);

    if(rc != OMPI_SUCCESS) {
        mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    *reg = rdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    return OMPI_SUCCESS;
}
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *reg;
    mca_mpool_base_registration_t *regs[RDMA_MPOOL_NREGS];
    int reg_cnt, i;
    int rc;

    /* Statistic */
    if(true == mca_mpool_rdma_component.print_stats) {
        opal_output(0, "%s rdma: stats "
                "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss,
                mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound,
                mpool_rdma->stat_evicted);
    }

    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    if(!opal_list_is_empty(&mpool_rdma->gc_list))
        do_unregistration_gc(mpool);
    do {
        reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
                regs, RDMA_MPOOL_NREGS);

        for(i = 0; i < reg_cnt; i++) {
            reg = regs[i];

            if(reg->ref_count) {
                reg->ref_count = 0; /* otherway dereg will fail on assert */
            } else if (mca_mpool_rdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)reg);
            }

	    /* Remove from rcache first */
            mpool->rcache->rcache_delete(mpool->rcache, reg);

	    /* Drop lock before deregistering memory */
	    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
	    rc = dereg_mem(mpool, reg);
	    OPAL_THREAD_LOCK(&mpool->rcache->lock);

            if(rc != OMPI_SUCCESS) {
		/* Potentially lose track of registrations
		   do we have to put it back? */
                continue;
            }

            OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                    (ompi_free_list_item_t*)reg);
        }
    } while(reg_cnt == RDMA_MPOOL_NREGS);

    OBJ_DESTRUCT(&mpool_rdma->mru_list);
    OBJ_DESTRUCT(&mpool_rdma->gc_list);
    OBJ_DESTRUCT(&mpool_rdma->reg_list);
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);

}