Esempio n. 1
0
/* Create the tree */
int opal_rb_tree_init(opal_rb_tree_t * tree,
                      opal_rb_tree_comp_fn_t comp)
{
    opal_free_list_item_t * node;
    /* we need to get memory for the root pointer from the free list */
    node = opal_free_list_get (&(tree->free_list));
    tree->root_ptr = (opal_rb_tree_node_t *) node;
    if (NULL == node) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    node = opal_free_list_get (&(tree->free_list));
    if (NULL == node) {
        opal_free_list_return (&tree->free_list, (opal_free_list_item_t*)tree->root_ptr);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    tree->nill = (opal_rb_tree_node_t *) node;
    /* initialize tree->nill */
    tree->nill->color = BLACK;
    tree->nill->left = tree->nill;
    tree->nill->right = tree->nill;
    tree->nill->parent = tree->nill;

    /* initialize the 'root' pointer */
    tree->root_ptr->left = tree->nill;
    tree->root_ptr->right = tree->nill;
    tree->root_ptr->parent = tree->nill;
    tree->root_ptr->color = BLACK;

    tree->comp = comp;

    /* set the tree size to zero */
    tree->tree_size = 0;

    return OPAL_SUCCESS;
}
Esempio n. 2
0
static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
    mca_btl_ugni_module_t *ugni_module = ep->btl;
    opal_free_list_item_t *mbox;

    mbox = opal_free_list_get (&ugni_module->smsg_mboxes);
    if (OPAL_UNLIKELY(NULL == mbox)) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    ep->mailbox = (mca_btl_ugni_smsg_mbox_t *) mbox;
    ep->mailbox->attr.index = ep->index;

    /* per ugni spec we need to zero mailbox data before connecting */
    memset ((char *)ep->mailbox->attr.smsg_attr.msg_buffer + ep->mailbox->attr.smsg_attr.mbox_offset, 0,
            ep->mailbox->attr.smsg_attr.buff_size);
    return OPAL_SUCCESS;
}
Esempio n. 3
0
static void* getBuffer(JNIEnv *env, ompi_java_buffer_t **item, int size)
{
    if(size > ompi_mpi_java_eager)
    {
        *item = NULL;
        return malloc(size);
    }
    else
    {
        opal_free_list_item_t *freeListItem;
        freeListItem = opal_free_list_get (&ompi_java_buffers);

        ompi_java_exceptionCheck(env, NULL == freeListItem ? MPI_ERR_NO_MEM :
                                 MPI_SUCCESS);
        if (NULL == freeListItem) {
            return NULL;
        }

        *item = (ompi_java_buffer_t*)freeListItem;
        return (*item)->buffer;
    }
}
Esempio n. 4
0
static int
read_msg(void *start, ptl_size_t length, ptl_process_t target,
         ptl_match_bits_t match_bits, ptl_size_t remote_offset,
         ompi_mtl_portals4_recv_request_t *request)
{
    int ret, i;
    ptl_size_t rest = length, asked = 0;
    int32_t frag_count;

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        ompi_mtl_portals4_progress();
    }
#endif

    frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
    ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count);

    for (i = 0 ; i < frag_count ; i++) {
        opal_free_list_item_t *tmp;
        ompi_mtl_portals4_rndv_get_frag_t* frag;

        tmp = opal_free_list_get (&ompi_mtl_portals4.fl_rndv_get_frag);
        if (NULL == tmp) return OMPI_ERR_OUT_OF_RESOURCE;

        frag = (ompi_mtl_portals4_rndv_get_frag_t*) tmp;

        frag->request = request;
#if OPAL_ENABLE_DEBUG
        frag->frag_num = i;
#endif
        frag->frag_start = (char*)start + i * ompi_mtl_portals4.max_msg_size_mtl;
        frag->frag_length = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest;
        frag->frag_target = target;
        frag->frag_match_bits = match_bits;
        frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl;

        frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress;
        frag->frag_abs_timeout_usec = 0;

        OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send",
                             i + 1, frag_count, frag->frag_length));

        ret = PtlGet(ompi_mtl_portals4.send_md_h,
                     (ptl_size_t) frag->frag_start,
                     frag->frag_length,
                     frag->frag_target,
                     ompi_mtl_portals4.read_idx,
                     frag->frag_match_bits,
                     frag->frag_remote_offset,
                     frag);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PtlGet failed: %d",
                                __FILE__, __LINE__, ret);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        rest -= frag->frag_length;
        asked += frag->frag_length;
    }

    return OMPI_SUCCESS;
}
Esempio n. 5
0
/*
 * get a tree item from the free list
 */
mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) {
    return (mca_mpool_base_tree_item_t *)
        opal_free_list_get (&mca_mpool_base_tree_item_free_list);
}
Esempio n. 6
0
/*
 * This function opens and handle using the handle that was received
 * from the remote memory.  It uses the addr and size of the remote
 * memory for caching the registration.
 */
int mca_rcache_rgpusm_register (mca_rcache_base_module_t *rcache, void *addr,
                               size_t size, uint32_t flags, int32_t access_flags,
                               mca_rcache_base_registration_t **reg)
{
    mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
    mca_rcache_common_cuda_reg_t *rgpusm_reg;
    mca_rcache_common_cuda_reg_t *rget_reg;
    opal_free_list_item_t *item;
    int rc;
    int mypeer;  /* just for debugging */

    /* In order to preserve the signature of the mca_rcache_rgpusm_register
     * function, we are using the **reg variable to not only get back the
     * registration information, but to hand in the memory handle received
     * from the remote side. */
    rget_reg = (mca_rcache_common_cuda_reg_t *)*reg;

    mypeer = flags;
    flags = 0;
    /* No need to support MCA_RCACHE_FLAGS_CACHE_BYPASS in here. It is not used. */
    assert(0 == (flags & MCA_RCACHE_FLAGS_CACHE_BYPASS));

    /* This chunk of code handles the case where leave pinned is not
     * set and we do not use the cache.  This is not typically how we
     * will be running.  This means that one can have an unlimited
     * number of registrations occuring at the same time.  Since we
     * are not leaving the registrations pinned, the number of
     * registrations is unlimited and there is no need for a cache. */
    if(!mca_rcache_rgpusm_component.leave_pinned && 0 == mca_rcache_rgpusm_component.rcache_size_limit) {
        item = opal_free_list_get (&rcache_rgpusm->reg_list);
        if(NULL == item) {
            return OPAL_ERR_OUT_OF_RESOURCE;
        }
        rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item;
        rgpusm_reg->base.rcache = rcache;
        rgpusm_reg->base.base = addr;
        rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;;
        rgpusm_reg->base.flags = flags;

        /* Copy the memory handle received into the registration */
        memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle));

        /* The rget_reg registration is holding the memory handle needed
         * to register the remote memory.  This was received from the remote
         * process.  A pointer to the memory is returned in the alloc_base field. */
        rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                                 (mca_rcache_base_registration_t *)rget_reg);

        /* This error should not happen with no cache in use. */
        assert(OPAL_ERR_WOULD_BLOCK != rc);

        if(rc != OPAL_SUCCESS) {
            opal_free_list_return (&rcache_rgpusm->reg_list, item);
            return rc;
        }
        rgpusm_reg->base.ref_count++;
        *reg = (mca_rcache_base_registration_t *)rgpusm_reg;
        return OPAL_SUCCESS;
    }

    /* Check to see if memory is registered and stored in the cache. */
    OPAL_THREAD_LOCK(&rcache->lock);
    mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, size, reg);

    /* If *reg is not NULL, we have a registration.  Let us see if the
     * memory handle matches the one we were looking for.  If not, the
     * registration is invalid and needs to be removed. This happens
     * if memory was allocated, freed, and allocated again and ends up
     * with the same virtual address and within the limits of the
     * previous registration.  The memory handle check will catch that
     * scenario as the handles have unique serial numbers.  */
    if (*reg != NULL) {
        rcache_rgpusm->stat_cache_hit++;
        opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                            "RGPUSM: Found addr=%p,size=%d (base=%p,size=%d) in cache",
                            addr, (int)size, (*reg)->base,
                            (int)((*reg)->bound - (*reg)->base));

        if (mca_common_cuda_memhandle_matches((mca_rcache_common_cuda_reg_t *)*reg, rget_reg)) {
            /* Registration matches what was requested.  All is good. */
            rcache_rgpusm->stat_cache_valid++;
        } else {
            /* This is an old registration.  Need to boot it. */
            opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                                "RGPUSM: Mismatched Handle: Evicting/unregistering "
                                "addr=%p,size=%d (base=%p,size=%d) from cache",
                                addr, (int)size, (*reg)->base,
                                (int)((*reg)->bound - (*reg)->base));

            /* The ref_count has to be zero as this memory cannot possibly
             * be in use.  Assert on that just to make sure. */
            assert(0 == (*reg)->ref_count);
            if (mca_rcache_rgpusm_component.leave_pinned) {
                opal_list_remove_item(&rcache_rgpusm->lru_list,
                                      (opal_list_item_t*)(*reg));
            }

            /* Bump the reference count to keep things copacetic in deregister */
            (*reg)->ref_count++;
            /* Invalidate the registration so it will get booted out. */
            (*reg)->flags |= MCA_RCACHE_FLAGS_INVALID;
            mca_rcache_rgpusm_deregister_no_lock(rcache, *reg);
            *reg = NULL;
            rcache_rgpusm->stat_cache_invalid++;
        }
    } else {
        /* Nothing was found in the cache. */
        rcache_rgpusm->stat_cache_miss++;
    }

    /* If we have a registration here, then we know it is valid. */
    if (*reg != NULL) {
        opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                            "RGPUSM: CACHE HIT is good: ep=%d, addr=%p, size=%d in cache",
                            mypeer, addr, (int)size);

        /* When using leave pinned, we keep an LRU list. */
        if ((0 == (*reg)->ref_count) && mca_rcache_rgpusm_component.leave_pinned) {
            opal_output_verbose(20, mca_rcache_rgpusm_component.output,
                                "RGPUSM: POP OFF LRU: ep=%d, addr=%p, size=%d in cache",
                                mypeer, addr, (int)size);
            opal_list_remove_item(&rcache_rgpusm->lru_list,
                                  (opal_list_item_t*)(*reg));
        }
        (*reg)->ref_count++;
        OPAL_THREAD_UNLOCK(&rcache->lock);
        opal_output(-1, "reg->ref_count=%d", (int)(*reg)->ref_count);
        opal_output_verbose(80, mca_rcache_rgpusm_component.output,
                           "RGPUSM: Found entry in cache addr=%p, size=%d", addr, (int)size);
        return OPAL_SUCCESS;
    }

    /* If we are here, then we did not find a registration, or it was invalid,
     * so this is a new one, and we are going to use the cache. */
    assert(NULL == *reg);
    opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                        "RGPUSM: New registration ep=%d, addr=%p, size=%d. Need to register and insert in cache",
                         mypeer, addr, (int)size);

    item = opal_free_list_get (&rcache_rgpusm->reg_list);
    if(NULL == item) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item;

    rgpusm_reg->base.rcache = rcache;
    rgpusm_reg->base.base = addr;
    rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;
    rgpusm_reg->base.flags = flags;

    /* Need the memory handle saved in the registration */
    memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle));

    /* Actually register the memory, which opens the memory handle.
     * Need to do this prior to putting in the cache as the base and
     * bound values may be changed by the registration.  The memory
     * associated with the handle comes back in the alloc_base
     * value. */
    rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                             (mca_rcache_base_registration_t *)rget_reg);
    /* There is a chance we can get the OPAL_ERR_WOULD_BLOCK from the
     * CUDA codes attempt to register the memory.  The case that this
     * can happen is as follows.  A block of memory is registered.
     * Then the sending side frees the memory.  The sending side then
     * cuMemAllocs memory again and gets the same base
     * address. However, it cuMemAllocs a block that is larger than
     * the one in the cache.  The cache will return that memory is not
     * registered and call into CUDA to register it.  However, that
     * will fail with CUDA_ERROR_ALREADY_MAPPED.  Therefore we need to
     * boot that previous allocation out and deregister it first.
     */
    if (OPAL_ERR_WOULD_BLOCK == rc) {
        mca_rcache_base_registration_t *oldreg;

        /* Need to make sure it is at least 4 bytes in size  This will
         * ensure we get the hit in the cache. */
        mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, 4, &oldreg);

        /* For most cases, we will find a registration that overlaps.
         * Removal of it should allow the registration we are
         * attempting to succeed. */
        if (NULL != oldreg) {
            /* The ref_count has to be zero as this memory cannot
             * possibly be in use.  Assert on that just to make sure. */
            assert(0 == oldreg->ref_count);
            if (mca_rcache_rgpusm_component.leave_pinned) {
                opal_list_remove_item(&rcache_rgpusm->lru_list,
                                      (opal_list_item_t*)oldreg);
            }

            /* Bump the reference count to keep things copacetic in deregister */
            oldreg->ref_count++;
            /* Invalidate the registration so it will get booted out. */
            oldreg->flags |= MCA_RCACHE_FLAGS_INVALID;
            mca_rcache_rgpusm_deregister_no_lock(rcache, oldreg);
            rcache_rgpusm->stat_evicted++;

            /* And try again.  This one usually works. */
            rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                                     (mca_rcache_base_registration_t *)rget_reg);
        }

        /* There is a chance that another registration is blocking our
         * ability to register.  Check the rc to see if we still need
         * to try and clear out registrations. */
        while (OPAL_SUCCESS != rc) {
            if (true != mca_rcache_rgpusm_deregister_lru(rcache)) {
                rc = OPAL_ERROR;
                break;
            }
            /* Clear out one registration. */
            rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
                                     (mca_rcache_base_registration_t *)rget_reg);
        }
    }

    if(rc != OPAL_SUCCESS) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        opal_free_list_return (&rcache_rgpusm->reg_list, item);
        return rc;
    }

    opal_output_verbose(80, mca_rcache_rgpusm_component.output,
                        "RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size);
    rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
                                      mca_rcache_rgpusm_component.rcache_size_limit);
    if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) {
        opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                            "RGPUSM: No room in the cache - boot the first one out");
        (void)mca_rcache_rgpusm_deregister_lru(rcache);
        if (mca_rcache_rgpusm_component.empty_cache) {
            int remNum = 1;
            /* Empty out every registration from LRU until it is empty */
            opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                                "RGPUSM: About to delete all the unused entries in the cache");
            while (mca_rcache_rgpusm_deregister_lru(rcache)) {
                remNum++;
            }
            opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                                "RGPUSM: Deleted and deregistered %d entries", remNum);
            rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
                                             mca_rcache_rgpusm_component.rcache_size_limit);
        } else {
            /* Check for room after one removal. If not, remove another one until there is space */
            while((rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
                                                    mca_rcache_rgpusm_component.rcache_size_limit)) ==
                  OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
                opal_output_verbose(40, mca_rcache_rgpusm_component.output,
                                    "RGPUSM: No room in the cache - boot one out");
                if (!mca_rcache_rgpusm_deregister_lru(rcache)) {
                    break;
                }
            }
        }
    }

    if(rc != OPAL_SUCCESS) {
        OPAL_THREAD_UNLOCK(&rcache->lock);
        opal_free_list_return (&rcache_rgpusm->reg_list, item);
        /* We cannot recover from this.  We can be here if the size of
         * the cache is smaller than the amount of memory we are
         * trying to register in a single transfer.  In that case, rc
         * is MPI_ERR_OUT_OF_RESOURCES, but everything is stuck at
         * that point.  Therefore, just error out completely.
         */
        opal_output_verbose(10, mca_rcache_rgpusm_component.output,
                            "RGPUSM: Failed to register addr=%p, size=%d", addr, (int)size);
        return OPAL_ERROR;
    }

    rgpusm_reg->base.ref_count++;
    *reg = (mca_rcache_base_registration_t *)rgpusm_reg;
    OPAL_THREAD_UNLOCK(&rcache->lock);

    return OPAL_SUCCESS;
}
Esempio n. 7
0
/* This inserts a node into the tree based on the passed values. */
int opal_rb_tree_insert(opal_rb_tree_t *tree, void * key, void * value)
{
    opal_rb_tree_node_t * y;
    opal_rb_tree_node_t * node;
    opal_free_list_item_t * item;

    /* get the memory for a node */
    item = opal_free_list_get (&tree->free_list);
    if (NULL == item) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    node = (opal_rb_tree_node_t *) item;
    /* insert the data into the node */
    node->key = key;
    node->value = value;

    /* insert the node into the tree */
    btree_insert(tree, node);

    /*do the rotations */
    /* usually one would have to check for NULL, but because of the sentinal,
     * we don't have to   */
    while (node->parent->color == RED) {
        if (node->parent == node->parent->parent->left) {
            y = node->parent->parent->right;
            if (y->color == RED) {
                node->parent->color = BLACK;
                y->color = BLACK;
                node->parent->parent->color = RED;
                node = node->parent->parent;
            } else {
                if (node == node->parent->right) {
                    node = node->parent;
                    left_rotate(tree, node);
                }
                node->parent->color = BLACK;
                node->parent->parent->color = RED;
                right_rotate(tree, node->parent->parent);
            }
        } else {
            y = node->parent->parent->left;
            if (y->color == RED) {
                node->parent->color = BLACK;
                y->color = BLACK;
                node->parent->parent->color = RED;
                node = node->parent->parent;
            } else {
                if (node == node->parent->left) {
                    node = node->parent;
                    right_rotate(tree, node);
                }
                node->parent->color = BLACK;
                node->parent->parent->color = RED;
                left_rotate(tree, node->parent->parent);
            }
        }
    }
    /* after the rotations the root is black */
    tree->root_ptr->left->color = BLACK;
    return OPAL_SUCCESS;
}
Esempio n. 8
0
static inline int
ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
                             struct ompi_communicator_t* comm,
                             int dest,
                             int tag,
                             struct opal_convertor_t *convertor,
                             mca_pml_base_send_mode_t mode,
                             ompi_mtl_portals4_isend_request_t* ptl_request)
{
    int ret= OMPI_SUCCESS;
    void *start;
    size_t length;
    bool free_after;
    ptl_process_t ptl_proc;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    opal_free_list_item_t *item;
    ompi_mtl_portals4_pending_request_t *pending;
#endif

    if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
        ptl_proc.rank = dest;
    } else {
        ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
        ptl_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
    }

    ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
    if (OMPI_SUCCESS != ret) return ret;

    ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1);
    ptl_request->buffer_ptr = (free_after) ? start : NULL;
    ptl_request->event_count = 0;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Send %lu to %x,%x of length %ld\n",
                         ptl_request->opcount,
                         ptl_proc.phys.nid,
                         ptl_proc.phys.pid,
                         (int64_t)length));

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    item = opal_free_list_get (&ompi_mtl_portals4.flowctl.pending_fl);
    if (NULL == item) return OMPI_ERR_OUT_OF_RESOURCE;

    pending = (ompi_mtl_portals4_pending_request_t*) item;
    ptl_request->pending = pending;
    pending->mode = mode;
    pending->start = start;
    pending->length = length;
    pending->contextid = comm->c_contextid;
    pending->tag = tag;
    pending->my_rank = comm->c_my_rank;
    pending->fc_notified = 0;
    pending->ptl_proc = ptl_proc;
    pending->ptl_request = ptl_request;

    if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
                         &pending->super.super);
        return OMPI_SUCCESS;
    }

    if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
                         &pending->super.super);
        ompi_mtl_portals4_pending_list_progress();
        return OMPI_SUCCESS;
    }

    if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
                         &pending->super.super);
        return OMPI_SUCCESS;
    }
#endif
    if (length <= ompi_mtl_portals4.eager_limit) {
        ret = ompi_mtl_portals4_short_isend(mode,
                                            start,
                                            length,
                                            comm->c_contextid,
                                            tag,
                                            comm->c_my_rank,
                                            ptl_proc,
                                            ptl_request);
    } else {
        ret = ompi_mtl_portals4_long_isend(start,
                                           length,
                                           comm->c_contextid,
                                           tag,
                                           comm->c_my_rank,
                                           ptl_proc,
                                           ptl_request);
    }

    return ret;
}
Esempio n. 9
0
/*
 * register memory
 */
int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
    const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
    const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST);
    mca_mpool_base_registration_t *grdma_reg;
    opal_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    OPAL_THREAD_LOCK(&mpool->rcache->lock);

    /* if cache bypass is requested don't use the cache */
    base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
                                            mca_mpool_base_page_size_log);
    if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
        do_unregistration_gc(mpool);

#if OPAL_CUDA_GDR_SUPPORT
    if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
        size_t psize;
        mca_common_cuda_get_address_range(&base, &psize, addr);
        bound = base + psize - 1;
        /* Check to see if this memory is in the cache and if it has been freed. If so,
         * this call will boot it out of the cache. */
        check_for_cuda_freed_memory(mpool, base, psize);
    }
#endif /* OPAL_CUDA_GDR_SUPPORT */

    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(bypass_cache || persist)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
        if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
            if (0 == (*reg)->ref_count) {
                /* Leave pinned must be set for this to still be in the rcache. */
                opal_list_remove_item(&mpool_grdma->pool->lru_list,
                                      (opal_list_item_t *)(*reg));
            }

            /* This segment fits fully within an existing segment. */
            mpool_grdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return OPAL_SUCCESS;
        }

        mpool_grdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* Unless explicitly requested by the caller always store the
         * registration in the rcache. This will speed up the case where
         * no leave pinned protocol is in use but the same segment is in
         * use in multiple simultaneous transactions. We used to set bypass_cache
         * here is !mca_mpool_grdma_component.leave_pinned. */
    }

    item = opal_free_list_get (&mpool_grdma->reg_list);
    if(NULL == item) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    grdma_reg = (mca_mpool_base_registration_t*)item;

    grdma_reg->mpool = mpool;
    grdma_reg->base = base;
    grdma_reg->bound = bound;
    grdma_reg->flags = flags;
#if OPAL_CUDA_GDR_SUPPORT
    if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
        mca_common_cuda_get_buffer_id(grdma_reg);
    }
#endif /* OPAL_CUDA_GDR_SUPPORT */

    if (false == bypass_cache) {
        rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);

        if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            opal_free_list_return (&mpool_grdma->reg_list, item);
            return rc;
        }
    }

    while (OPAL_ERR_OUT_OF_RESOURCE ==
           (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
                                                     base, bound - base + 1, grdma_reg))) {
        /* try to remove one unused reg and retry */
        if (!mca_mpool_grdma_evict (mpool)) {
            break;
        }
    }

    if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
        if (false == bypass_cache) {
            mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
        }
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        opal_free_list_return (&mpool_grdma->reg_list, item);
        return rc;
    }

    *reg = grdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);

    /* Cleanup any vmas that we have deferred deletion on */
    mpool->rcache->rcache_clean(mpool->rcache);
    return OPAL_SUCCESS;
}