Example #1
0
mca_btl_base_descriptor_t* 
mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
                            struct mca_btl_base_endpoint_t* peer,
                            mca_mpool_base_registration_t* registration, 
                            struct ompi_convertor_t* convertor,
                            size_t reserve,
                            size_t* size)
{
    mca_btl_portals_frag_t* frag;
    size_t max_data = *size;
    struct iovec iov;
    uint32_t iov_count = 1;
    int ret;

    assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);

    if (0 != reserve || 0 != ompi_convertor_need_buffers(convertor)) {
        frag = (mca_btl_portals_frag_t*) 
            mca_btl_portals_alloc(btl_base, max_data + reserve);
        if (NULL == frag)  {
            return NULL;
        }

        if (max_data + reserve > frag->size) {
            max_data = frag->size - reserve;
        }
        
        iov.iov_len = max_data;
        iov.iov_base = (unsigned char*) frag->segments[0].seg_addr.pval + reserve;
        ret = ompi_convertor_pack(convertor, &iov, &iov_count, 
                                  &max_data );
        *size  = max_data;
        if ( ret < 0 ) {
            return NULL;
        }

        frag->segments[0].seg_len = max_data + reserve;
        frag->base.des_src_cnt = 1;

    } else {
        /* no need to pack - rdma operation out of user's buffer */
        ptl_md_t md;
        ptl_handle_me_t me_h;

        /* reserve space in the event queue for rdma operations immediately */
        while (OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, 1) >
               mca_btl_portals_module.portals_max_outstanding_ops) {
            OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
            mca_btl_portals_component_progress();
        }

        OMPI_BTL_PORTALS_FRAG_ALLOC_USER(&mca_btl_portals_module.super, frag, ret);
        if(NULL == frag){
            OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
            return NULL;
        }
        iov.iov_len = max_data;
        iov.iov_base = NULL;

        ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );

        frag->segments[0].seg_len = max_data;
        frag->segments[0].seg_addr.pval = iov.iov_base;
        frag->segments[0].seg_key.key64 = 
            OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
        frag->base.des_src_cnt = 1;

        /* either a put or get.  figure out which later */
        OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
                             "rdma src posted for frag 0x%x, callback 0x%x, bits %lld",
                             frag, frag->base.des_cbfunc, frag->segments[0].seg_key.key64));

        /* create a match entry */
        ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
                          OMPI_BTL_PORTALS_RDMA_TABLE_ID,
                          *((mca_btl_base_endpoint_t*) peer),
                          frag->segments[0].seg_key.key64, /* match */
                          0, /* ignore */
                          PTL_UNLINK,
                          PTL_INS_AFTER,
                          &me_h);
        if (PTL_OK != ret) {
            opal_output(mca_btl_portals_component.portals_output,
                        "Error creating rdma src ME: %d", ret);
            OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
            OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
            return NULL;
        }

        /* setup the memory descriptor */
        md.start = frag->segments[0].seg_addr.pval;
        md.length = frag->segments[0].seg_len;
        md.threshold = PTL_MD_THRESH_INF;
        md.max_size = 0;
        md.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = frag; /* keep a pointer to ourselves */
        md.eq_handle = mca_btl_portals_module.portals_eq_handles[OMPI_BTL_PORTALS_EQ_SEND];

        ret = PtlMDAttach(me_h, 
                          md,
                          PTL_UNLINK,
                          &(frag->md_h));
        if (PTL_OK != ret) {
            opal_output(mca_btl_portals_component.portals_output,
                        "Error creating rdma src MD: %d", ret);
            PtlMEUnlink(me_h);
            OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
            OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
            return NULL;
        }
    }

    frag->base.des_src = frag->segments;
    frag->base.des_dst = NULL;
    frag->base.des_dst_cnt = 0;
    frag->base.des_flags = 0;

    return &frag->base;
}
Example #2
0
/**
 * Prepare data for send/put
 *
 * @param btl (IN)      BTL module
 */
struct mca_btl_base_descriptor_t* mca_btl_self_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    mca_mpool_base_registration_t* registration,
    struct ompi_convertor_t* convertor,
    size_t reserve,
    size_t* size)
{
    mca_btl_self_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

    /* non-contigous data */
    if( ompi_convertor_need_buffers(convertor) ||
        max_data < mca_btl_self.btl_max_send_size ||
        reserve != 0 ) {

        MCA_BTL_SELF_FRAG_ALLOC_SEND(frag, rc);
        if(NULL == frag) {
            return NULL;
        }

        if(reserve + max_data > frag->size) {
            max_data = frag->size - reserve;
        } 
        iov.iov_len = max_data;
        iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)(frag+1) + reserve);

        rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if(rc < 0) {
            MCA_BTL_SELF_FRAG_RETURN_SEND(frag);
            return NULL;
        }
        frag->base.des_flags = 0;
        frag->segment.seg_addr.pval = frag+1;
        frag->segment.seg_len = reserve + max_data;
        *size = max_data;
    } else {
        MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag, rc);
        if(NULL == frag) {
            return NULL;
        }
        iov.iov_len = max_data;
        iov.iov_base = NULL;

        /* convertor should return offset into users buffer */
        rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if(rc < 0) {
            MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
            return NULL;
        }
        frag->segment.seg_addr.pval = iov.iov_base;
        frag->segment.seg_len = max_data;
        frag->base.des_flags = 0;
        *size = max_data;
    }
    frag->base.des_src          = &frag->segment;
    frag->base.des_src_cnt      = 1;
    frag->segment.seg_key.key64 = (uint64_t)(intptr_t)convertor;
    return &frag->base;
}
Example #3
0
/**
 * Initiate an inline send to the peer. If failure then return a descriptor.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
static int mca_btl_mx_sendi( struct mca_btl_base_module_t* btl,
                             struct mca_btl_base_endpoint_t* endpoint,
                             struct ompi_convertor_t* convertor,
                             void* header,
                             size_t header_size,
                             size_t payload_size,
                             uint8_t order,
                             uint32_t flags,
                             mca_btl_base_tag_t tag,
                             mca_btl_base_descriptor_t** descriptor )
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; 
    size_t max_data;
    
    if( OPAL_UNLIKELY(MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status) ) {
        if( MCA_BTL_MX_NOT_REACHEABLE == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERROR;
        if( MCA_BTL_MX_CONNECTION_PENDING == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERR_OUT_OF_RESOURCE;
        if( OMPI_SUCCESS != mca_btl_mx_proc_connect( (mca_btl_mx_endpoint_t*)endpoint ) )
            return OMPI_ERROR;
    }
    
    if( !ompi_convertor_need_buffers(convertor) ) {
        uint32_t mx_segment_count = 0;
        uint64_t tag64 = 0x01ULL | (((uint64_t)tag) << 8);
        mx_return_t mx_return;
        mx_request_t mx_request;
        mx_segment_t mx_segments[2], *mx_segment = mx_segments;
        
        if( 0 != header_size ) {
            mx_segment->segment_ptr    = header;
            mx_segment->segment_length = header_size;
            mx_segment++;
            mx_segment_count++;
        }
        if( 0 != payload_size ) {
            struct iovec iov;
            uint32_t iov_count = 1;
            
            iov.iov_base = NULL;
            iov.iov_len = payload_size;
            
            (void)ompi_convertor_pack( convertor, &iov, &iov_count, &max_data );
            assert( max_data == payload_size );
            
            mx_segment->segment_ptr    = iov.iov_base;
            mx_segment->segment_length = max_data;
            mx_segment_count++;
        }
        
        mx_return = mx_isend( mx_btl->mx_endpoint, mx_segments, mx_segment_count,
                              endpoint->mx_peer_addr, tag64, NULL, &mx_request );
        if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
            opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
            return OMPI_ERROR;
        }
#ifdef HAVE_MX_FORGET
        {
            uint32_t mx_result;
            mx_return = mx_ibuffered( mx_btl->mx_endpoint, &mx_request, &mx_result );
            if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
                opal_output( 0, "mx_ibuffered failed with error %d (%s)\n",
                             mx_return, mx_strerror(mx_return) );
                return OMPI_SUCCESS;
            }
            if( mx_result ) {
                mx_return = mx_forget( mx_btl->mx_endpoint, &mx_request );
                if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
                    opal_output( 0, "mx_forget failed with error %d (%s)\n",
                                 mx_return, mx_strerror(mx_return) );
                }
            }
            return OMPI_SUCCESS;
        }
#endif
    }
    /* No optimization on this path. Just allocate a descriptor and return it
     * to the user.
     */
    *descriptor = mca_btl_mx_alloc( btl, endpoint, order,
                                    header_size + payload_size, flags );
    return OMPI_ERR_RESOURCE_BUSY;
}
Example #4
0
/**
 * register user buffer or pack 
 * data into pre-registered buffer and return a 
 * descriptor that can be
 * used for send/put.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 *  
 * prepare source's behavior depends on the following: 
 * Has a valid memory registration been passed to prepare_src? 
 *    if so we attempt to use the pre-registred user-buffer, if the memory registration 
 *    is to small (only a portion of the user buffer) then we must reregister the user buffer 
 * Has the user requested the memory to be left pinned? 
 *    if so we insert the memory registration into a memory tree for later lookup, we 
 *    may also remove a previous registration if a MRU (most recently used) list of 
 *    registions is full, this prevents resources from being exhausted.
 * Is the requested size larger than the btl's max send size? 
 *    if so and we aren't asked to leave the registration pinned than we register the memory if 
 *    the users buffer is contiguous 
 * Otherwise we choose from two free lists of pre-registered memory in which to pack the data into. 
 * 
 */
mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    mca_mpool_base_registration_t* registration, 
    struct ompi_convertor_t* convertor,
    size_t reserve,
    size_t* size
)
{
    mca_btl_openib_module_t *openib_btl;
    mca_btl_openib_frag_t *frag = NULL;
    mca_btl_openib_reg_t *openib_reg;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

    openib_btl = (mca_btl_openib_module_t*)btl;

    if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
        if(registration != NULL || max_data > btl->btl_max_send_size) {
            MCA_BTL_IB_FRAG_ALLOC_SEND_FRAG(btl, frag, rc);
            if(NULL == frag) {
                return NULL;
            }

            iov.iov_len = max_data;
            iov.iov_base = NULL;
        
            ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
                
            *size = max_data;

            if(NULL == registration) {
                rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
                        iov.iov_base, max_data, 0, &registration);
                if(OMPI_SUCCESS != rc || NULL == registration) {
                    MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
                    return NULL;
                }
                /* keep track of the registration we did */
                frag->registration = (mca_btl_openib_reg_t*)registration;
            }
            openib_reg = (mca_btl_openib_reg_t*)registration;

            frag->base.des_flags = 0;
            frag->base.des_src = &frag->segment;
            frag->base.des_src_cnt = 1;
            frag->base.des_dst = NULL;
            frag->base.des_dst_cnt = 0;
            frag->base.des_flags = 0;

            frag->sg_entry.length = max_data;
            frag->sg_entry.lkey = openib_reg->mr->lkey;
            frag->sg_entry.addr = (unsigned long)iov.iov_base;

            frag->segment.seg_len = max_data;
            frag->segment.seg_addr.pval = iov.iov_base;
            frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;

            BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
                        "frag->segment.seg_key.key32[0] = %lu",
                        frag->sg_entry.lkey, frag->sg_entry.addr,
                        frag->segment.seg_key.key32[0]));

            return &frag->base;
        }
    }

    if(max_data + reserve <= btl->btl_eager_limit) {
        /* the data is small enough to fit in the eager frag and
         * memory is not prepinned */
        MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
    }

    if(NULL == frag) {
        /* the data doesn't fit into eager frag or eger frag is
         * not available */
        MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
        if(NULL == frag) {
            return NULL;
        }
        if(max_data + reserve > btl->btl_max_send_size) {
            max_data = btl->btl_max_send_size - reserve;
        }
    }

    iov.iov_len = max_data;
    iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
    rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
    if(rc < 0) {
        MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
        return NULL;
    }
    *size  = max_data;
    frag->segment.seg_len = max_data + reserve;
    frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
    frag->base.des_src = &frag->segment;
    frag->base.des_src_cnt = 1;
    frag->base.des_dst = NULL;
    frag->base.des_dst_cnt = 0;
    frag->base.des_flags = 0;

    return &frag->base;
}
Example #5
0
/**
 * Pack data and return a descriptor that can be
 * used for send/put.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
mca_btl_base_descriptor_t*
mca_btl_mx_prepare_src( struct mca_btl_base_module_t* btl,
                        struct mca_btl_base_endpoint_t* endpoint,
                        struct mca_mpool_base_registration_t* registration,
                        struct ompi_convertor_t* convertor,
                        uint8_t order,
                        size_t reserve,
                        size_t* size,
                        uint32_t flags)
{
    mca_btl_mx_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data;
    int rc;

    max_data = btl->btl_eager_limit - reserve;
    if( (*size) < max_data ) {
        max_data = *size;
    }
    /* If the data is contiguous we can use directly the pointer
     * to the user memory.
     */
    if( 0 == ompi_convertor_need_buffers(convertor) ) {
        /**
         * let the convertor figure out the correct pointer depending
         * on the data layout
         */
        iov.iov_base = NULL;
        if( 0 == reserve ) {
            MCA_BTL_MX_FRAG_ALLOC_USER(btl, frag, rc);
            if( OPAL_UNLIKELY(NULL == frag) ) {
                return NULL;
            }
            max_data = *size;
            frag->base.des_src_cnt = 1;
        } else {
            MCA_BTL_MX_FRAG_ALLOC_EAGER( mx_btl, frag, rc );
            if( OPAL_UNLIKELY(NULL == frag) ) {
                return NULL;
            }
            frag->base.des_src_cnt = 2;
        }
    } else {
        MCA_BTL_MX_FRAG_ALLOC_EAGER( mx_btl, frag, rc );
        if( OPAL_UNLIKELY(NULL == frag) ) {
            return NULL;
        }
        frag->base.des_src_cnt = 1;
        iov.iov_base = (void*)((unsigned char*)frag->segment[0].seg_addr.pval + reserve);
    }

    iov.iov_len = max_data;
    (void)ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
    *size = max_data;

    if( 1 == frag->base.des_src_cnt ) {
        frag->segment[0].seg_len = reserve + max_data;
        if( 0 == reserve )
            frag->segment[0].seg_addr.pval = iov.iov_base;
    } else {
        frag->segment[0].seg_len       = reserve;
        frag->segment[1].seg_len       = max_data;
        frag->segment[1].seg_addr.pval = iov.iov_base;
    }
    frag->base.des_src   = frag->segment;
    frag->base.des_flags = flags;
    frag->base.order     = MCA_BTL_NO_ORDER;

    return &frag->base;
}