int32_t vprotocol_pessimist_sender_based_convertor_advance(opal_convertor_t* pConvertor,
                                                            struct iovec* iov,
                                                            uint32_t* out_size,
                                                            size_t* max_data) {
    int ret;
    unsigned int i;
    size_t pending_length;
    mca_vprotocol_pessimist_send_request_t *ftreq;

    ftreq = VPESSIMIST_CONV_REQ(pConvertor);
    pConvertor->flags = ftreq->sb.conv_flags;
    pConvertor->fAdvance = ftreq->sb.conv_advance;
    ret = opal_convertor_pack(pConvertor, iov, out_size, max_data);
    V_OUTPUT_VERBOSE(39, "pessimist:\tsb\tpack\t%"PRIsize_t, *max_data);

    for(i = 0, pending_length = *max_data; pending_length > 0; i++) {
        assert(i < *out_size);
        MEMCPY((void *) ftreq->sb.cursor, iov[i].iov_base, iov[i].iov_len);
        pending_length -= iov[i].iov_len;
        ftreq->sb.cursor += iov[i].iov_len;
    }
    assert(pending_length == 0);

    pConvertor->flags &= ~CONVERTOR_NO_OP;
    pConvertor->fAdvance = &vprotocol_pessimist_sender_based_convertor_advance;
    return ret;
}
Example #2
0
static inline struct mca_btl_base_descriptor_t *
mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
                               mca_btl_base_endpoint_t *endpoint,
                               struct opal_convertor_t *convertor,
                               uint8_t order, size_t reserve, size_t *size,
                               uint32_t flags)
{
    mca_btl_scif_base_frag_t *frag = NULL;
    uint32_t iov_count = 1;
    struct iovec iov;
    size_t max_size = *size;
    int rc;

    if (OPAL_LIKELY((mca_btl_scif_module.super.btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) &&
                    !opal_convertor_need_buffers (convertor) &&
                    reserve <= 128)) {
        /* inplace send */
        void *data_ptr;
        opal_convertor_get_current_pointer (convertor, &data_ptr);

        (void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag);
        if (OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }

        frag->segments[0].seg_len       = reserve;
        frag->segments[1].seg_addr.pval = data_ptr;
        frag->segments[1].seg_len       = *size;
        frag->base.des_segment_count = 2;
    } else {
        /* buffered send */
        (void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
        if (OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }

        if (*size) {
            iov.iov_len  = *size;
            iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve);

            rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size);
            if (OPAL_UNLIKELY(rc < 0)) {
                mca_btl_scif_frag_return (frag);
                return NULL;
            }
            *size = max_size;
        }

        frag->segments[0].seg_len = reserve + *size;
        frag->base.des_segment_count = 1;
    }

    frag->base.des_segments = frag->segments;
    frag->base.order        = order;
    frag->base.des_flags    = flags;

    return &frag->base;
}
int mca_pml_base_bsend_request_start(ompi_request_t* request)
{
    mca_pml_base_send_request_t* sendreq = (mca_pml_base_send_request_t*)request;
    struct iovec iov;
    unsigned int iov_count;
    size_t max_data;
    int rc;

    if(sendreq->req_bytes_packed > 0) {

        /* has a buffer been provided */
        OPAL_THREAD_LOCK(&mca_pml_bsend_mutex);
        if(NULL == mca_pml_bsend_addr) {
            sendreq->req_addr = NULL;
            OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);
            return OMPI_ERR_BUFFER;
        }

        /* allocate a buffer to hold packed message */
        sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
            mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
        if(NULL == sendreq->req_addr) {
            /* release resources when request is freed */
            sendreq->req_base.req_pml_complete = true;
            OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);
            return OMPI_ERR_BUFFER;
        }
    
        OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);

        /* The convertor is already initialized in the begining so we just have to
         * pack the data in the newly allocated buffer.
         */
        iov.iov_base = (IOVBASE_TYPE*)sendreq->req_addr;
        iov.iov_len = sendreq->req_bytes_packed;
        iov_count = 1;
        max_data = iov.iov_len;
        if((rc = opal_convertor_pack( &sendreq->req_base.req_convertor, 
                                      &iov, 
                                      &iov_count, 
                                      &max_data )) < 0) {
            return OMPI_ERROR;
        }
 
        /* setup convertor to point to packed buffer (at position zero) */
        opal_convertor_prepare_for_send( &sendreq->req_base.req_convertor, &(ompi_mpi_packed.dt.super),
                                         max_data, sendreq->req_addr );
        /* increment count of pending requests */
        mca_pml_bsend_count++;
    }
    
    return OMPI_SUCCESS;
}
Example #4
0
static size_t mca_pml_yalla_stream_pack(void *buffer, size_t length, size_t offset,
                                        opal_convertor_t *convertor)
{
    uint32_t iov_count;
    struct iovec iov;

    iov_count    = 1;
    iov.iov_base = buffer;
    iov.iov_len  = length;

    opal_convertor_set_position(convertor, &offset);
    opal_convertor_pack(convertor, &iov, &iov_count, &length);
    return length;
}
Example #5
0
static size_t ompi_mtl_mxm_stream_send(void *buffer, size_t length, size_t offset, void *context)
{
    struct iovec iov;
    uint32_t iov_count = 1;

    opal_convertor_t *convertor = (opal_convertor_t *) context;

    iov.iov_len = length;
    iov.iov_base = buffer;

    opal_convertor_set_position(convertor, &offset);
    opal_convertor_pack(convertor, &iov, &iov_count, &length);

    return length;
}
Example #6
0
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
                            struct mca_btl_base_endpoint_t* peer,
                            struct opal_convertor_t* convertor,
                            uint8_t order,
                            size_t reserve,
                            size_t* size,
                            uint32_t flags)
{
    struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
    mca_btl_portals4_frag_t* frag;
    size_t max_data = *size;
    struct iovec iov;
    uint32_t iov_count = 1;
    int ret;

    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
        "mca_btl_portals4_prepare_src NI=%d reserve=%ld size=%ld max_data=%ld\n", portals4_btl->interface_num, reserve, *size, max_data));

    if (0 != reserve || 0 != opal_convertor_need_buffers(convertor)) {
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_prepare_src NEED BUFFERS or RESERVE\n"));
        frag = (mca_btl_portals4_frag_t*) mca_btl_portals4_alloc(btl_base, peer, MCA_BTL_NO_ORDER, max_data + reserve, flags);
        if (NULL == frag)  {
            return NULL;
        }

        if (max_data + reserve > frag->size) {
            max_data = frag->size - reserve;
        }
        iov.iov_len = max_data;
        iov.iov_base = (unsigned char*) frag->segments[0].base.seg_addr.pval + reserve;
        ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );

        *size  = max_data;
        if (ret < 0) {
            mca_btl_portals4_free(btl_base, (mca_btl_base_descriptor_t *) frag);
            return NULL;
        }

        frag->segments[0].base.seg_len = max_data + reserve;
        frag->base.des_segment_count = 1;
    }

    frag->base.des_segments = &frag->segments[0].base;
    frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
    frag->base.order = MCA_BTL_NO_ORDER;
    return &frag->base;
}
Example #7
0
/**
 * Pack data
 *
 * @param btl (IN)      BTL module
 */
struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    struct opal_convertor_t* convertor,
    uint8_t order,
    size_t reserve,
    size_t* size,
    uint32_t flags)
{
    mca_btl_smcuda_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

    if ( reserve + max_data <= mca_btl_smcuda_component.eager_limit ) {
        MCA_BTL_SMCUDA_FRAG_ALLOC_EAGER(frag);
    } else {
        MCA_BTL_SMCUDA_FRAG_ALLOC_MAX(frag);
    }
    if( OPAL_UNLIKELY(NULL == frag) ) {
        return NULL;
    }

    if( OPAL_UNLIKELY(reserve + max_data > frag->size) ) {
        max_data = frag->size - reserve;
    }
    iov.iov_len = max_data;
    iov.iov_base =
        (IOVBASE_TYPE*)(((unsigned char*)(frag->segment.seg_addr.pval)) + reserve);

    rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
    if( OPAL_UNLIKELY(rc < 0) ) {
        MCA_BTL_SMCUDA_FRAG_RETURN(frag);
        return NULL;
    }

    frag->segment.seg_len = reserve + max_data;
    frag->base.des_segments = &frag->segment;
    frag->base.des_segment_count = 1;
    frag->base.order = MCA_BTL_NO_ORDER;
    frag->base.des_flags = flags;
    *size = max_data;
    return &frag->base;
}
Example #8
0
/**
 * Prepare data for send
 *
 * @param btl (IN)      BTL module
 */
static struct mca_btl_base_descriptor_t *mca_btl_self_prepare_src (struct mca_btl_base_module_t* btl,
                                                                   struct mca_btl_base_endpoint_t *endpoint,
                                                                   struct opal_convertor_t *convertor,
                                                                   uint8_t order, size_t reserve,
                                                                   size_t *size, uint32_t flags)
{
    bool inline_send = !opal_convertor_need_buffers(convertor);
    size_t buffer_len = reserve + (inline_send ? 0 : *size);
    mca_btl_self_frag_t *frag;

    frag = (mca_btl_self_frag_t *) mca_btl_self_alloc (btl, endpoint, order, buffer_len, flags);
    if (OPAL_UNLIKELY(NULL == frag)) {
        return NULL;
    }

    /* non-contigous data */
    if (OPAL_UNLIKELY(!inline_send)) {
        struct iovec iov = {.iov_len = *size, .iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->data + reserve)};
        size_t max_data = *size;
        uint32_t iov_count = 1;
        int rc;

        rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
        if(rc < 0) {
            mca_btl_self_free (btl, &frag->base);
            return NULL;
        }

        *size = max_data;
        frag->segments[0].seg_len = reserve + max_data;
    } else {
        void *data_ptr;

        opal_convertor_get_current_pointer (convertor, &data_ptr);

        frag->segments[1].seg_addr.pval = data_ptr;
        frag->segments[1].seg_len = *size;
        frag->base.des_segment_count = 2;
    }

    return &frag->base;
}
Example #9
0
static int
pack_segments( ompi_datatype_t* datatype, int count,
               size_t segment_size,
               ddt_segment_t* segments, int seg_count,
               void* buffer )
{
    size_t max_size, position;
    opal_convertor_t* convertor;
    struct iovec iov;
    int i;
    uint32_t iov_count;

    convertor = opal_convertor_create( opal_local_arch, 0 );
    opal_convertor_prepare_for_send( convertor, &(datatype->super), count, buffer );

    for( i = 0; i < seg_count; i++ ) {
        iov.iov_len  = segments[i].size;
        iov.iov_base = segments[i].buffer;
        max_size = iov.iov_len;
        position = segments[i].position;
        opal_convertor_set_position( convertor, &position );
        if( position != segments[i].position ) {
            opal_output( 0, "Setting position failed (%lu != %lu)\n",
                         (unsigned long)segments[i].position, (unsigned long)position );
            break;
        }

        iov_count = 1;
        opal_convertor_pack( convertor, &iov, &iov_count, &max_size );
        if( max_size != segments[i].size ) {
            opal_output( 0, "Amount of packed data do not match (%lu != %lu)\n",
                         (unsigned long)max_size, (unsigned long)segments[i].size );
            opal_output( 0, "Segment %d position %lu size %lu\n", i,
                         (unsigned long)segments[i].position, segments[i].size );
        }
    }
    OBJ_RELEASE(convertor);
    return i;
}
Example #10
0
static inline __opal_attribute_always_inline__ int
               ompi_mtl_mxm_choose_send_datatype(mxm_send_req_t *mxm_send_req,
                                           opal_convertor_t *convertor)
{
    struct iovec iov;
    uint32_t iov_count = 1;

    size_t *buffer_len = &mxm_send_req->base.data.buffer.length;

    opal_convertor_get_packed_size(convertor, buffer_len);
    if (0 == *buffer_len) {
        mxm_send_req->base.data.buffer.ptr = NULL;
        mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;

        return OMPI_SUCCESS;
    }

    if (opal_convertor_need_buffers(convertor)) {
        mxm_send_req->base.context = convertor;
        mxm_send_req->base.data_type = MXM_REQ_DATA_STREAM;
        mxm_send_req->base.data.stream.length = *buffer_len;
        mxm_send_req->base.data.stream.cb = ompi_mtl_mxm_stream_send;

        return OMPI_SUCCESS;
    }

    mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;

    iov.iov_base = NULL;
    iov.iov_len = *buffer_len;

    opal_convertor_pack(convertor, &iov, &iov_count, buffer_len);
    mxm_send_req->base.data.buffer.ptr = iov.iov_base;

    return OMPI_SUCCESS;
}
static int local_copy_with_convertor( const opal_datatype_t const* pdt, int count, int chunk )
{
    OPAL_PTRDIFF_TYPE extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    opal_datatype_type_extent( pdt, &extent );

    pdst  = malloc( extent * count );
    psrc  = malloc( extent * count );
    ptemp = malloc( chunk );

    {
        int i = 0;
        for( ; i < (count * extent); ((char*)psrc)[i] = i % 128 + 32, i++ );
    }
    memset( pdst, 0, count * extent );

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OPAL_SUCCESS;
}
Example #12
0
static int local_copy_with_convertor( opal_datatype_t const * const pdt, int count, int chunk )
{
    OPAL_PTRDIFF_TYPE lb, extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    char *odst, *osrc;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data, length = 0, malloced_size;
    int32_t done1 = 0, done2 = 0, errors = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    malloced_size = compute_memory_size(pdt, count);
    opal_datatype_get_extent( pdt, &lb, &extent );

    odst = (char*)malloc( malloced_size );
    osrc = (char*)malloc( malloced_size );
    ptemp = malloc( chunk );

    {
        for( size_t i = 0; i < malloced_size; osrc[i] = i % 128 + 32, i++ );
        memcpy(odst, osrc, malloced_size);
    }
    pdst  = odst - lb;
    psrc  = osrc - lb;

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
        if( outputFlags & RESET_CONVERTORS ) {
            struct dt_stack_t stack[1+send_convertor->stack_pos];
            int i, stack_pos = send_convertor->stack_pos;
            size_t pos;

            if( 0 == done1 ) {
                memcpy(stack, send_convertor->pStack, (1+send_convertor->stack_pos) * sizeof(struct dt_stack_t));
                pos = 0;
                opal_convertor_set_position(send_convertor, &pos);
                pos = length;
                opal_convertor_set_position(send_convertor, &pos);
                assert(pos == length);
                for(i = 0; i <= stack_pos; i++ ) {
                    if( stack[i].index != send_convertor->pStack[i].index )
                        {errors = 1; printf("send stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n",
                                            i, stack[i].index, send_convertor->pStack[i].index,
                                            length, pdt->size * count);}
                    if( stack[i].count != send_convertor->pStack[i].count ) {
                        if( stack[i].type == send_convertor->pStack[i].type ) {
                            {errors = 1; printf("send stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n",
                                                    i, stack[i].count, send_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        } else {
                            if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= send_convertor->pStack[i].type) )
                                {errors = 1; printf("send stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, (int)send_convertor->pStack[i].type,
                                                    length, pdt->size * count);}
                            else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) !=
                                     (send_convertor->pStack[i].count * opal_datatype_basicDatatypes[send_convertor->pStack[i].type]->size) )
                                {errors = 1; printf("send stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, stack[i].count,
                                                    (int)send_convertor->pStack[i].type, send_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        }
                    }
                    if( stack[i].disp != send_convertor->pStack[i].disp )
                        {errors = 1; printf("send stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n",
                                            i, (void*)stack[i].disp, (void*)send_convertor->pStack[i].disp,
                                            length, pdt->size * count);}
                    if(0 != errors) {assert(0); exit(-1);}
                }
            }
            if( 0 == done2 ) {
                memcpy(stack, recv_convertor->pStack, (1+recv_convertor->stack_pos) * sizeof(struct dt_stack_t));
                pos = 0;
                opal_convertor_set_position(recv_convertor, &pos);
                pos = length;
                opal_convertor_set_position(recv_convertor, &pos);
                assert(pos == length);
                for(i = 0; i <= stack_pos; i++ ) {
                    if( stack[i].index != recv_convertor->pStack[i].index )
                        {errors = 1; printf("recv stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n",
                                            i, stack[i].index, recv_convertor->pStack[i].index,
                                            length, pdt->size * count);}
                    if( stack[i].count != recv_convertor->pStack[i].count ) {
                        if( stack[i].type == recv_convertor->pStack[i].type ) {
                            {errors = 1; printf("recv stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n",
                                                    i, stack[i].count, recv_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        } else {
                            if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= recv_convertor->pStack[i].type) )
                                {errors = 1; printf("recv stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, (int)recv_convertor->pStack[i].type,
                                                    length, pdt->size * count);}
                            else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) !=
                                     (recv_convertor->pStack[i].count * opal_datatype_basicDatatypes[recv_convertor->pStack[i].type]->size) )
                                {errors = 1; printf("recv stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, stack[i].count,
                                                    (int)recv_convertor->pStack[i].type, recv_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        }
                    }
                    if( stack[i].disp != recv_convertor->pStack[i].disp )
                        {errors = 1; printf("recv stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n",
                                            i, (void*)stack[i].disp, (void*)recv_convertor->pStack[i].disp,
                                            length, pdt->size * count);}
                    if(0 != errors) {assert(0); exit(-1);}
                }
            }
        }
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );

    if(outputFlags & VALIDATE_DATA) {
        for( size_t i = errors = 0; i < malloced_size; i++ ) {
            if( odst[i] != osrc[i] ) {
                printf("error at position %lu (%d != %d)\n",
                       (unsigned long)i, (int)(odst[i]), (int)(osrc[i]));
                errors++;
                if(outputFlags & QUIT_ON_FIRST_ERROR) {
                    opal_datatype_dump(pdt);
                    assert(0); exit(-1);
                }
            }
        }
        if( 0 == errors ) {
            printf("Validation check succesfully passed\n");
        } else {
            printf("Found %d errors. Giving up!\n", errors);
            exit(-1);
        }
    }
 clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != odst ) free( odst );
    if( NULL != osrc ) free( osrc );
    if( NULL != ptemp ) free( ptemp );
    return (0 == errors ? OPAL_SUCCESS : errors);
}
Example #13
0
mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
                                        size_t size,
                                        uint64_t shmid,
                                        int *count)
{
    int i;
    mca_btl_base_descriptor_t* des = NULL;
    const opal_datatype_t *datatype = &opal_datatype_wchar;
    opal_convertor_t convertor;
    mca_spml_mkey_t *mkeys;
    struct yoda_btl *ybtl;
    oshmem_proc_t *proc_self;
    mca_spml_yoda_context_t* yoda_context;
    struct iovec iov;
    uint32_t iov_count = 1;


    SPML_VERBOSE(10, "address %p len %llu", addr, (unsigned long long)size);
    *count = 0;
    /* make sure everything is initialized to 0 */
    mkeys = (mca_spml_mkey_t *) calloc(1,
                                       mca_spml_yoda.n_btls * sizeof(*mkeys));
    if (!mkeys) {
        return NULL ;
    }

    proc_self = oshmem_proc_group_find(oshmem_group_all, oshmem_my_proc_id());
    /* create convertor */
    OBJ_CONSTRUCT(&convertor, opal_convertor_t);

    mca_bml.bml_register( MCA_SPML_YODA_PUT,
                          mca_yoda_put_callback,
                          NULL );
    mca_bml.bml_register( MCA_SPML_YODA_GET,
                          mca_yoda_get_callback,
                          NULL );
    mca_bml.bml_register( MCA_SPML_YODA_GET_RESPONSE,
                          mca_yoda_get_response_callback,
                          NULL );
    /* Register proc memory in every rdma BTL. */
    for (i = 0; i < mca_spml_yoda.n_btls; i++) {

        ybtl = &mca_spml_yoda.btl_type_map[i];
        mkeys[i].va_base = addr;

        if (!ybtl->use_cnt) {
            SPML_VERBOSE(10,
                         "%s: present but not in use. SKIP registration",
                         btl_type2str(ybtl->btl_type));
            continue;
        }

        /* If we have shared memory just save its id*/
        if (YODA_BTL_SM == ybtl->btl_type
                && MEMHEAP_SHM_INVALID != (int) MEMHEAP_SHM_GET_ID(shmid)) {
            mkeys[i].u.key = shmid;
            mkeys[i].va_base = 0;
            continue;
        }

        yoda_context = calloc(1, sizeof(*yoda_context));
        mkeys[i].spml_context = yoda_context;

        yoda_context->registration = NULL;
        if (NULL != ybtl->btl->btl_prepare_src) {
            /* initialize convertor for source descriptor*/
            opal_convertor_copy_and_prepare_for_recv(proc_self->proc_convertor,
                                                     datatype,
                                                     size,
                                                     addr,
                                                     0,
                                                     &convertor);

            if (NULL != ybtl->btl->btl_mpool && NULL != ybtl->btl->btl_mpool->mpool_register) {
                iov.iov_len = size;
                iov.iov_base = NULL;

                opal_convertor_pack(&convertor, &iov, &iov_count, &size);
                ybtl->btl->btl_mpool->mpool_register(ybtl->btl->btl_mpool,
                                                     iov.iov_base, size, 0, &yoda_context->registration);
            }
            /* initialize convertor for source descriptor*/
            opal_convertor_copy_and_prepare_for_recv(proc_self->proc_convertor,
                                                     datatype,
                                                     size,
                                                     addr,
                                                     0,
                                                     &convertor);

            /* register source memory */
            des = ybtl->btl->btl_prepare_src(ybtl->btl,
                                             0,
                                             yoda_context->registration,
                                             &convertor,
                                             MCA_BTL_NO_ORDER,
                                             0,
                                             &size,
                                             0);
            if (NULL == des) {
                SPML_ERROR("%s: failed to register source memory. ",
                           btl_type2str(ybtl->btl_type));
            }

            yoda_context->btl_src_descriptor = des;
            mkeys[i].u.data = des->des_src;
            mkeys[i].len  = ybtl->btl->btl_seg_size;
        }

        SPML_VERBOSE(5,
                     "rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X",
                     oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), 
                     mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid));
    }
    OBJ_DESTRUCT(&convertor);
    *count = mca_spml_yoda.n_btls;
    return mkeys;
}
Example #14
0
/**
 * Pack data
 *
 * @param btl (IN)      BTL module
 */
static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl,
                                                            struct mca_btl_base_endpoint_t *endpoint,
                                                            struct opal_convertor_t *convertor,
                                                            uint8_t order, size_t reserve, size_t *size,
                                                            uint32_t flags)
{
    const size_t total_size = reserve + *size;
    mca_btl_vader_frag_t *frag;
    unsigned char *fbox;
    void *data_ptr;
    int rc;

    opal_convertor_get_current_pointer (convertor, &data_ptr);

    /* in place send fragment */
    if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
        uint32_t iov_count = 1;
        struct iovec iov;

        /* non-contiguous data requires the convertor */
        if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism &&
            total_size > mca_btl_vader.super.btl_eager_limit) {
            (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
        } else
            (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);

        if (OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }

        iov.iov_len = *size;
        iov.iov_base =
            (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
                             reserve);

        rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
        if (OPAL_UNLIKELY(rc < 0)) {
            MCA_BTL_VADER_FRAG_RETURN(frag);
            return NULL;
        }

        frag->segments[0].seg_len = *size + reserve;
    } else {
        if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
            if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) {
                (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
            } else {
                (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
            }
        } else
            (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);

        if (OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }

#if OPAL_BTL_VADER_HAVE_XPMEM
        /* use xpmem to send this segment if it is above the max inline send size */
        if (OPAL_UNLIKELY(MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism &&
                          total_size > (size_t) mca_btl_vader_component.max_inline_send)) {
            /* single copy send */
            frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;

            /* set up single copy io vector */
            frag->hdr->sc_iov.iov_base = data_ptr;
            frag->hdr->sc_iov.iov_len  = *size;

            frag->segments[0].seg_len = reserve;
            frag->segments[1].seg_len = *size;
            frag->segments[1].seg_addr.pval = data_ptr;
            frag->base.des_segment_count = 2;
        } else {
#endif

            /* inline send */
            if (OPAL_LIKELY(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP & flags)) {
                /* try to reserve a fast box for this transfer only if the
                 * fragment does not belong to the caller */
                fbox = mca_btl_vader_reserve_fbox (endpoint, total_size);
                if (OPAL_LIKELY(fbox)) {
                    frag->segments[0].seg_addr.pval = fbox;
                }

                frag->fbox = fbox;
            }

            /* NTH: the covertor adds some latency so we bypass it here */
            memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size);
            frag->segments[0].seg_len = total_size;
#if OPAL_BTL_VADER_HAVE_XPMEM
        }
#endif
    }

    frag->base.order       = order;
    frag->base.des_flags   = flags;

    return &frag->base;
}
Example #15
0
static int
local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, int send_count,
                                      opal_datatype_t const * const recv_type, int recv_count,
                                      int chunk )
{
    OPAL_PTRDIFF_TYPE send_lb, send_extent, recv_lb, recv_extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    char *odst, *osrc;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data, length = 0, send_malloced_size, recv_malloced_size;;
    int32_t done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    send_malloced_size = compute_memory_size(send_type, send_count);
    recv_malloced_size = compute_memory_size(recv_type, recv_count);

    opal_datatype_get_extent( send_type, &send_lb, &send_extent );
    opal_datatype_get_extent( recv_type, &recv_lb, &recv_extent );

    odst = (char*)malloc( recv_malloced_size );
    osrc = (char*)malloc( send_malloced_size );
    ptemp = malloc( chunk );

    /* fill up the receiver with ZEROS */
    {
        for( size_t i = 0; i < send_malloced_size; i++ )
            osrc[i] = i % 128 + 32;
    }
    memset( odst, 0, recv_malloced_size );
    pdst  = odst - recv_lb;
    psrc  = osrc - send_lb;

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }
    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;

        if( outputFlags & RESET_CONVERTORS ) {
            size_t pos = 0;
            opal_convertor_set_position(send_convertor, &pos);
            pos = length;
            opal_convertor_set_position(send_convertor, &pos);
            assert(pos == length);

            pos = 0;
            opal_convertor_set_position(recv_convertor, &pos);
            pos = length;
            opal_convertor_set_position(recv_convertor, &pos);
            assert(pos == length);
        }
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying different data-types using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( send_convertor != NULL ) {
        OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL );
    }
    if( recv_convertor != NULL ) {
        OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL );
    }
    if( NULL != odst ) free( odst );
    if( NULL != osrc ) free( osrc );
    if( NULL != ptemp ) free( ptemp );
    return OPAL_SUCCESS;
}
Example #16
0
/**
 * Pack data
 *
 * @param btl (IN)      BTL module
 */
struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    mca_mpool_base_registration_t* registration,
    struct opal_convertor_t* convertor,
    uint8_t order,
    size_t reserve,
    size_t* size,
    uint32_t flags)
{
    mca_btl_sm_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

#if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
    mca_btl_sm_t* sm_btl = (mca_btl_sm_t*)btl; (void)sm_btl;

    if( (0 != reserve) || ( OPAL_UNLIKELY(!mca_btl_sm_component.use_knem)
                            && OPAL_UNLIKELY(!mca_btl_sm_component.use_cma)) ) {
#endif /* OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA */
        if ( reserve + max_data <= mca_btl_sm_component.eager_limit ) {
            MCA_BTL_SM_FRAG_ALLOC_EAGER(frag);
        } else {
            MCA_BTL_SM_FRAG_ALLOC_MAX(frag);
        }
        if( OPAL_UNLIKELY(NULL == frag) ) {
            return NULL;
        }

        if( OPAL_UNLIKELY(reserve + max_data > frag->size) ) {
            max_data = frag->size - reserve;
        }
        iov.iov_len = max_data;
        iov.iov_base =
            (IOVBASE_TYPE*)(((unsigned char*)(frag->segment.base.seg_addr.pval)) + reserve);

        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if( OPAL_UNLIKELY(rc < 0) ) {
            MCA_BTL_SM_FRAG_RETURN(frag);
            return NULL;
        }
        frag->segment.base.seg_len = reserve + max_data;
#if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
    } else {
#if OMPI_BTL_SM_HAVE_KNEM
        struct knem_cmd_create_region knem_cr;
        struct knem_cmd_param_iovec knem_iov;
#endif /* OMPI_BTL_SM_HAVE_KNEM */
        MCA_BTL_SM_FRAG_ALLOC_USER(frag);
        if( OPAL_UNLIKELY(NULL == frag) ) {
            return NULL;
        }
        iov.iov_len = max_data;
        iov.iov_base = NULL;
        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
        if( OPAL_UNLIKELY(rc < 0) ) {
            MCA_BTL_SM_FRAG_RETURN(frag);
            return NULL;
        }
        frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
        frag->segment.base.seg_len = max_data;

#if OMPI_BTL_SM_HAVE_KNEM
        if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) {
            knem_iov.base = (uintptr_t)iov.iov_base;
            knem_iov.len = max_data;
            knem_cr.iovec_array = (uintptr_t)&knem_iov;
            knem_cr.iovec_nr = iov_count;
            knem_cr.protection = PROT_READ;
            knem_cr.flags = KNEM_FLAG_SINGLEUSE;
            if (OPAL_UNLIKELY(ioctl(sm_btl->knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
                return NULL;
            }
            frag->segment.key = knem_cr.cookie;
        }
#endif /* OMPI_BTL_SM_HAVE_KNEM */

#if OMPI_BTL_SM_HAVE_CMA
        if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) {
            /* Encode the pid as the key */
            frag->segment.key = getpid();
        }
#endif /* OMPI_BTL_SM_HAVE_CMA */
    }
#endif /* OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA */

    frag->base.des_src = &(frag->segment.base);
    frag->base.des_src_cnt = 1;
    frag->base.order = MCA_BTL_NO_ORDER;
    frag->base.des_dst = NULL;
    frag->base.des_dst_cnt = 0;
    frag->base.des_flags = flags;
    *size = max_data;
    return &frag->base;
}
Example #17
0
/**
 * Initiate an inline send to the peer.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
                         struct mca_btl_base_endpoint_t *endpoint,
                         struct opal_convertor_t *convertor,
                         void *header, size_t header_size,
                         size_t payload_size, uint8_t order,
                         uint32_t flags, mca_btl_base_tag_t tag,
                         mca_btl_base_descriptor_t **descriptor)
{
    size_t length = (header_size + payload_size);
    mca_btl_vader_frag_t *frag;
    uint32_t iov_count = 1;
    struct iovec iov;
    size_t max_data;
    void *data_ptr = NULL;

    assert (length < mca_btl_vader_component.eager_limit);
    assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));

    /* we won't ever return a descriptor */
    *descriptor = NULL;

    if (OPAL_LIKELY(!(payload_size && opal_convertor_need_buffers (convertor)))) {
        if (payload_size) {
            opal_convertor_get_current_pointer (convertor, &data_ptr);
        }

        if (mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) {
            return OMPI_SUCCESS;
        }
    }

    /* allocate a fragment, giving up if we can't get one */
    frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length,
                                                         flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
    if (OPAL_UNLIKELY(NULL == frag)) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* fill in fragment fields */
    frag->hdr->len = length;
    frag->hdr->tag = tag;

    /* write the match header (with MPI comm/tag/etc. info) */
    memcpy (frag->segment.seg_addr.pval, header, header_size);

    /* write the message data if there is any */
    /*
      We can add MEMCHECKER calls before and after the packing.
    */
    /* we can't use single-copy semantics here since as caller will consider the send
       complete if we return success */
    if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
        /* pack the data into the supplied buffer */
        iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size);
        iov.iov_len  = max_data = payload_size;

        (void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);

        assert (max_data == payload_size);
    } else if (payload_size) {
        /* bypassing the convertor may speed things up a little */
        opal_convertor_get_current_pointer (convertor, &data_ptr);
        memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size);
    }

    opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);

    /* write the fragment pointer to peer's the FIFO */
    vader_fifo_write (frag->hdr, endpoint->peer_smp_rank);

    /* the progress function will return the fragment */

    return OMPI_SUCCESS;
}
/**
 * Pack data
 *
 * @param btl (IN)      BTL module
 */
static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl,
                                                            struct mca_btl_base_endpoint_t *endpoint,
                                                            mca_mpool_base_registration_t *registration,
                                                            struct opal_convertor_t *convertor,
                                                            uint8_t order, size_t reserve, size_t *size,
                                                            uint32_t flags)
{
    const size_t total_size = reserve + *size;
    struct iovec iov;
    mca_btl_vader_frag_t *frag;
    uint32_t iov_count = 1;
    void *data_ptr, *fbox_ptr;
    int rc;

    opal_convertor_get_current_pointer (convertor, &data_ptr);

    if (OPAL_LIKELY(reserve)) {
        /* in place send fragment */
        if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
            /* non-contiguous data requires the convertor */
            (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag);
            if (OPAL_UNLIKELY(NULL == frag)) {
                return NULL;
            }

            iov.iov_len = *size;
            iov.iov_base =
                (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
                                 reserve);

            rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
            if (OPAL_UNLIKELY(rc < 0)) {
                MCA_BTL_VADER_FRAG_RETURN(frag);
                return NULL;
            }

            frag->segments[0].seg_len = total_size;
        } else {
            (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
            if (OPAL_UNLIKELY(NULL == frag)) {
                return NULL;
            }

            if (total_size > (size_t) mca_btl_vader_component.max_inline_send) {
                /* single copy send */
                frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;

                /* set up single copy io vector */
                frag->hdr->sc_iov.iov_base = data_ptr;
                frag->hdr->sc_iov.iov_len  = *size;

                frag->segments[0].seg_len = reserve;
                frag->segments[1].seg_len = *size;
                frag->segments[1].seg_addr.pval = data_ptr;
                frag->base.des_src_cnt = 2;
            } else {
                /* inline send */
                /* try to reserve a fast box for this transfer */
                fbox_ptr = mca_btl_vader_reserve_fbox (endpoint, total_size);

                if (fbox_ptr) {
                    frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
                    frag->segments[0].seg_addr.pval = fbox_ptr;
                }

                /* NTH: the covertor adds some latency so we bypass it here */
                vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
                               data_ptr, *size);
                frag->segments[0].seg_len = total_size;
            }
        }
    } else {
        /* put/get fragment */
        (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
        if (OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }

        frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
        frag->segments[0].seg_len       = total_size;
    }

    frag->base.order       = order;
    frag->base.des_flags   = flags;

    frag->endpoint = endpoint;

    return &frag->base;
}
Example #19
0
int mca_common_ompio_file_iwrite (ompio_file_t *fh,
				const void *buf,
				int count,
				struct ompi_datatype_t *datatype,
				ompi_request_t **request)
{
    int ret = OMPI_SUCCESS;
    mca_ompio_request_t *ompio_req=NULL;
    size_t spc=0;

    if (fh->f_amode & MPI_MODE_RDONLY){
//      opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n");
        ret = MPI_ERR_READ_ONLY;
      return ret;
    }
    
    mca_common_ompio_request_alloc ( &ompio_req, MCA_OMPIO_REQUEST_WRITE);

    if ( 0 == count ) {
        ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
        ompio_req->req_ompi.req_status._ucount = 0;
        ompi_request_complete (&ompio_req->req_ompi, false);
        *request = (ompi_request_t *) ompio_req;
        
        return OMPI_SUCCESS;
    }

    if ( NULL != fh->f_fbtl->fbtl_ipwritev ) {
        /* This fbtl has support for non-blocking operations */
        
        uint32_t iov_count = 0;
        struct iovec *decoded_iov = NULL;
        size_t max_data = 0;
        size_t total_bytes_written =0;
        int i = 0; /* index into the decoded iovec of the buffer */
        int j = 0; /* index into the file vie iovec */

#if OPAL_CUDA_SUPPORT
        int is_gpu, is_managed;
        mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
        if ( is_gpu && !is_managed ) {
            size_t pos=0;
            char *tbuf=NULL;
            opal_convertor_t convertor;

            OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);        
            
            opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
            opal_convertor_cleanup (&convertor);

            ompio_req->req_tbuf = tbuf;
            ompio_req->req_size = max_data;
        }
        else {
            mca_common_ompio_decode_datatype (fh,
                                              datatype,
                                              count,
                                              buf,
                                              &max_data,
                                              &decoded_iov,
                                              &iov_count);
        }
#else
        mca_common_ompio_decode_datatype (fh,
                                          datatype,
                                          count,
                                          buf,
                                          &max_data,
                                          &decoded_iov,
                                          &iov_count);
#endif
        if ( 0 < max_data && 0 == fh->f_iov_count  ) {
            ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
            ompio_req->req_ompi.req_status._ucount = 0;
            ompi_request_complete (&ompio_req->req_ompi, false);
            *request = (ompi_request_t *) ompio_req;
            return OMPI_SUCCESS;
        }

        j = fh->f_index_in_file_view;

        /* Non blocking operations have to occur in a single cycle */
        mca_common_ompio_build_io_array ( fh,
                                          0,         // index of current cycle iteration
                                          1,         // number of cycles
                                          max_data,  // setting bytes_per_cycle to max_data
                                          max_data,
                                          iov_count,
                                          decoded_iov,
                                          &i,
                                          &j,
                                          &total_bytes_written, 
                                          &spc);
        
        if (fh->f_num_of_io_entries) {
            fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req);
        }
        
        mca_common_ompio_register_progress ();

        fh->f_num_of_io_entries = 0;
        if (NULL != fh->f_io_array) {
            free (fh->f_io_array);
            fh->f_io_array = NULL;
        }
        if (NULL != decoded_iov) {
            free (decoded_iov);
            decoded_iov = NULL;
        }
    }
    else {
        // This fbtl does not support non-blocking write operations
        ompi_status_public_t status;
        ret = mca_common_ompio_file_write(fh,buf,count,datatype, &status);
        
        ompio_req->req_ompi.req_status.MPI_ERROR = ret;
        ompio_req->req_ompi.req_status._ucount = status._ucount;
        ompi_request_complete (&ompio_req->req_ompi, false);
    }

    *request = (ompi_request_t *) ompio_req;
    return ret;
}
Example #20
0
static int
local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count,
                                      ompi_datatype_t* recv_type, int recv_count,
                                      int chunk )
{
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;
    size_t slength, rlength;

    rlength = compute_buffer_length(recv_type, recv_count);
    slength = compute_buffer_length(send_type, send_count);
    pdst  = malloc( rlength );
    psrc  = malloc( slength );
    ptemp = malloc( chunk );

    /* initialize the buffers to prevent valgrind from complaining */
    for( int i = 0; i < slength; i++ )
            ((char*)psrc)[i] = i % 128 + 32;
    memset(pdst, 0, rlength);

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, &(send_type->super), send_count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }
    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, &(recv_type->super), recv_count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying different data-types using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( send_convertor != NULL ) {
        OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL );
    }
    if( recv_convertor != NULL ) {
        OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL );
    }
    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OMPI_SUCCESS;
}
Example #21
0
static int
setup_scatter_buffers_linear(struct ompi_communicator_t   *comm,
                             ompi_coll_portals4_request_t *request,
                             mca_coll_portals4_module_t   *portals4_module)
{
    int ret, line;

    int8_t i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank);

    ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter,
            request->u.scatter.pack_src_buf,
            ompi_comm_peer_lookup(comm, request->u.scatter.my_rank),
            request->u.scatter.pack_src_count,
            request->u.scatter.pack_src_dtype);
    opal_convertor_get_packed_size(&request->u.scatter.send_converter, &request->u.scatter.packed_size);
    OBJ_DESTRUCT(&request->u.scatter.send_converter);

    /**********************************/
    /* Setup Scatter Buffers           */
    /**********************************/
    if (i_am_root) {

        /*
         * calculate the total size of the packed data
         */
        request->u.scatter.scatter_bytes=request->u.scatter.packed_size * (ptrdiff_t)request->u.scatter.size;

        /* all transfers done using request->u.scatter.sdtype.
         * allocate temp buffer for recv, copy and/or rotate data at the end */
        request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes);
        if (NULL == request->u.scatter.scatter_buf) {
            ret = OMPI_ERR_OUT_OF_RESOURCE;
            line = __LINE__;
            goto err_hdlr;
        }
        request->u.scatter.free_after = 1;

        for (int32_t i=0; i<request->u.scatter.size; i++) {
            uint32_t iov_count = 1;
            struct iovec iov;
            size_t max_data;

            uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * i;

            opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                                "%s:%d:rank(%d): offset(%lu)",
                                __FILE__, __LINE__, request->u.scatter.my_rank,
                                offset);

            ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter,
                    request->u.scatter.pack_src_buf + offset,
                    ompi_comm_peer_lookup(comm, request->u.scatter.my_rank),
                    request->u.scatter.pack_src_count,
                    request->u.scatter.pack_src_dtype);

            iov.iov_len = request->u.scatter.packed_size;
            iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.scatter.scatter_buf + (request->u.scatter.packed_size*i));
            opal_convertor_pack(&request->u.scatter.send_converter, &iov, &iov_count, &max_data);

            OBJ_DESTRUCT(&request->u.scatter.send_converter);
        }

        opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                            "%s:%d:rank(%d): root - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld) * size(%d)",
                            __FILE__, __LINE__, request->u.scatter.my_rank,
                            request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes,
                            request->u.scatter.packed_size, request->u.scatter.size);
    } else {
        request->u.scatter.scatter_bytes=request->u.scatter.packed_size;
        request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes);
        if (NULL == request->u.scatter.scatter_buf) {
            ret = OMPI_ERR_OUT_OF_RESOURCE;
            line = __LINE__;
            goto err_hdlr;
        }
        request->u.scatter.free_after = 1;

        opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                            "%s:%d:rank(%d): leaf - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld)",
                            __FILE__, __LINE__, request->u.scatter.my_rank,
                            request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes,
                            request->u.scatter.packed_size);
    }

    return OMPI_SUCCESS;

err_hdlr:
    opal_output(ompi_coll_base_framework.framework_output,
                "%s:%4d:%4d\tError occurred ret=%d, rank %2d",
                __FILE__, __LINE__, line, ret, request->u.scatter.my_rank);

    return ret;
}
Example #22
0
int mca_common_ompio_file_write (ompio_file_t *fh,
			       const void *buf,
			       int count,
			       struct ompi_datatype_t *datatype,
			       ompi_status_public_t *status)
{
    int ret = OMPI_SUCCESS;
    int index = 0;
    int cycles = 0;

    uint32_t iov_count = 0;
    struct iovec *decoded_iov = NULL;
    size_t bytes_per_cycle=0;
    size_t total_bytes_written = 0;
    size_t max_data=0, real_bytes_written=0;
    ssize_t ret_code=0;
    size_t spc=0;
    int i = 0; /* index into the decoded iovec of the buffer */
    int j = 0; /* index into the file view iovec */

    if (fh->f_amode & MPI_MODE_RDONLY){
//      opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n");
        ret = MPI_ERR_READ_ONLY;
      return ret;
    }

    
    if ( 0 == count ) {
        if ( MPI_STATUS_IGNORE != status ) {
            status->_ucount = 0;
        }
        return ret;
    }

#if OPAL_CUDA_SUPPORT
    int is_gpu, is_managed;
    mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
    if ( is_gpu && !is_managed ) {
        size_t pos=0;
        char *tbuf=NULL;
        opal_convertor_t convertor;
        
        OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);        
        
        opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos );
        opal_convertor_cleanup ( &convertor);
    }
    else {
        mca_common_ompio_decode_datatype (fh,
                                          datatype,
                                          count,
                                          buf,
                                          &max_data,
                                          &decoded_iov,
                                          &iov_count);
    }
#else
    mca_common_ompio_decode_datatype (fh,
                                      datatype,
                                      count,
                                      buf,
                                      &max_data,
                                      &decoded_iov,
                                      &iov_count);
#endif
    if ( 0 < max_data && 0 == fh->f_iov_count  ) {
        if ( MPI_STATUS_IGNORE != status ) {
            status->_ucount = 0;
        }
        return OMPI_SUCCESS;
    }

    if ( -1 == OMPIO_MCA_GET(fh, cycle_buffer_size )) {
        bytes_per_cycle = max_data;
    }
    else {
	bytes_per_cycle = OMPIO_MCA_GET(fh, cycle_buffer_size);
    }
    cycles = ceil((double)max_data/bytes_per_cycle);

#if 0
    printf ("Bytes per Cycle: %d   Cycles: %d\n", bytes_per_cycle, cycles);
#endif

    j = fh->f_index_in_file_view;
    for (index = 0; index < cycles; index++) {
        mca_common_ompio_build_io_array ( fh,
                                          index,
                                          cycles,
                                          bytes_per_cycle,
                                          max_data,
                                          iov_count,
                                          decoded_iov,
                                          &i,
                                          &j,
                                          &total_bytes_written, 
                                          &spc);

        if (fh->f_num_of_io_entries) {
            ret_code =fh->f_fbtl->fbtl_pwritev (fh);
            if ( 0<= ret_code ) {
                real_bytes_written+= (size_t)ret_code;
            }
        }

        fh->f_num_of_io_entries = 0;
        if (NULL != fh->f_io_array) {
            free (fh->f_io_array);
            fh->f_io_array = NULL;
        }
    }
#if OPAL_CUDA_SUPPORT
    if ( is_gpu && !is_managed ) {
        mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
    }
#endif

    if (NULL != decoded_iov) {
        free (decoded_iov);
        decoded_iov = NULL;
    }

    if ( MPI_STATUS_IGNORE != status ) {
        status->_ucount = real_bytes_written;
    }

    return ret;
}
Example #23
0
/**
 * Pack data and return a descriptor that can be
 * used for send/put.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    struct opal_convertor_t* convertor,
    uint8_t order,
    size_t reserve,
    size_t* size,
    uint32_t flags)
{
    mca_btl_tcp_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

    if( OPAL_UNLIKELY(max_data > UINT32_MAX) ) {  /* limit the size to what we support */
        max_data = (size_t)UINT32_MAX;
    }
    /*
     * if we aren't pinning the data and the requested size is less
     * than the eager limit pack into a fragment from the eager pool
     */
    if (max_data+reserve <= btl->btl_eager_limit) {
        MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag);
    } else {
        /*
         * otherwise pack as much data as we can into a fragment
         * that is the max send size.
         */
        MCA_BTL_TCP_FRAG_ALLOC_MAX(frag);
    }
    if( OPAL_UNLIKELY(NULL == frag) ) {
        return NULL;
    }

    frag->segments[0].seg_addr.pval = (frag + 1);
    frag->segments[0].seg_len = reserve;

    frag->base.des_segment_count = 1;
    if(opal_convertor_need_buffers(convertor)) {

        if (max_data + reserve > frag->size) {
            max_data = frag->size - reserve;
        }
        iov.iov_len = max_data;
        iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segments[0].seg_addr.pval)) + reserve);

        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if( OPAL_UNLIKELY(rc < 0) ) {
            mca_btl_tcp_free(btl, &frag->base);
            return NULL;
        }

        frag->segments[0].seg_len += max_data;

    } else {

        iov.iov_len = max_data;
        iov.iov_base = NULL;

        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if( OPAL_UNLIKELY(rc < 0) ) {
            mca_btl_tcp_free(btl, &frag->base);
            return NULL;
        }

        frag->segments[1].seg_addr.pval = iov.iov_base;
        frag->segments[1].seg_len = max_data;
        frag->base.des_segment_count = 2;
    }

    frag->base.des_segments = frag->segments;
    frag->base.des_flags = flags;
    frag->base.order = MCA_BTL_NO_ORDER;
    *size = max_data;
    return &frag->base;
}
/**
 * Initiate an inline send to the peer.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
                         struct mca_btl_base_endpoint_t *endpoint,
                         struct opal_convertor_t *convertor,
                         void *header, size_t header_size,
                         size_t payload_size, uint8_t order,
                         uint32_t flags, mca_btl_base_tag_t tag,
                         mca_btl_base_descriptor_t **descriptor)
{
    mca_btl_vader_frag_t *frag;
    void *data_ptr = NULL;
    size_t length;

    if (payload_size) {
        opal_convertor_get_current_pointer (convertor, &data_ptr);
    }

    if (!(payload_size && opal_convertor_need_buffers (convertor)) &&
        mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) {
        return OMPI_SUCCESS;
    }


    length = header_size + payload_size;

    /* allocate a fragment, giving up if we can't get one */
    frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length,
                                                         flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
    if (OPAL_UNLIKELY(NULL == frag)) {
        *descriptor = NULL;

        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* fill in fragment fields */
    frag->hdr->len = length;
    frag->hdr->tag = tag;

    /* write the match header (with MPI comm/tag/etc. info) */
    memcpy (frag->segments[0].seg_addr.pval, header, header_size);

    /* write the message data if there is any */
    /* we can't use single-copy semantics here since as caller will consider the send
       complete when we return */
    if (payload_size) {
        uint32_t iov_count = 1;
        struct iovec iov;

        /* pack the data into the supplied buffer */
        iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
        iov.iov_len  = length = payload_size;

        (void) opal_convertor_pack (convertor, &iov, &iov_count, &length);

        assert (length == payload_size);
    }

    /* write the fragment pointer to peer's the FIFO. the progress function will return the fragment */
    vader_fifo_write_ep (frag->hdr, endpoint);

    return OMPI_SUCCESS;
}
Example #25
0
/**
 * Pack data and return a descriptor that can be
 * used for send/put.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    struct mca_mpool_base_registration_t* registration,
    struct opal_convertor_t* convertor,
    uint8_t order,
    size_t reserve,
    size_t* size,
    uint32_t flags
)
{
    mca_btl_udapl_frag_t* frag = NULL;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;
    int pad = 0;

    /* compute pad as needed */
    MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad,
        (max_data + reserve + sizeof(mca_btl_udapl_footer_t)));

    if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) {
        if(registration != NULL || max_data > btl->btl_max_send_size) {

            MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag);
            if(NULL == frag){
                return NULL;
            }

            iov.iov_len = max_data;
            iov.iov_base = NULL;

            opal_convertor_pack(convertor, &iov,
                &iov_count, &max_data );

            *size = max_data;
        
            if(NULL == registration) {
                rc = btl->btl_mpool->mpool_register(btl->btl_mpool, iov.iov_base,
                    max_data, 0,
                    &registration);

                if(rc != OMPI_SUCCESS) {
                    MCA_BTL_UDAPL_FRAG_RETURN_USER(btl,frag);
                    return NULL;
                }
                /* keep track of the registration we did */
                frag->registration = (mca_btl_udapl_reg_t*)registration;
            }

            frag->segment.base.seg_len = max_data;
            frag->segment.base.seg_addr.pval = iov.iov_base;
            frag->triplet.segment_length = max_data;
            frag->triplet.virtual_address = (DAT_VADDR)(uintptr_t)iov.iov_base;
            frag->triplet.lmr_context =
                ((mca_btl_udapl_reg_t*)registration)->lmr_triplet.lmr_context;

            /* initialize base descriptor */
            frag->base.des_src = &frag->segment;
            frag->base.des_src_cnt = 1;
            frag->base.des_dst = NULL;
            frag->base.des_dst_cnt = 0;
            frag->base.des_flags = flags;
            frag->base.order = MCA_BTL_NO_ORDER;
            return &frag->base;
        }
    }

    if(max_data + pad + reserve <= btl->btl_eager_limit) {
        /* the data is small enough to fit in the eager frag and
         * memory is not prepinned */
        MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(btl, frag);
    }

    if(NULL == frag) {
        /* the data doesn't fit into eager frag or eager frag is
         * not available */
        MCA_BTL_UDAPL_FRAG_ALLOC_MAX(btl, frag);
        if(NULL == frag) {
            return NULL;
        }
        if(max_data + reserve > btl->btl_max_send_size) {
            max_data = btl->btl_max_send_size - reserve;
        }
    }
    
    iov.iov_len = max_data;
    iov.iov_base = (char *) frag->segment.base.seg_addr.pval + reserve;
    
    rc = opal_convertor_pack(convertor,
        &iov, &iov_count, &max_data );
    if(rc < 0) {
        MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag);
        return NULL;
    }

    *size = max_data;

    /* setup lengths and addresses to send out data */
    frag->segment.base.seg_len = max_data + reserve;
    frag->triplet.segment_length =
        max_data + reserve + sizeof(mca_btl_udapl_footer_t);
    frag->triplet.virtual_address =
        (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval;

    /* initialize base descriptor */
    frag->base.des_src = &frag->segment;
    frag->base.des_src_cnt = 1;
    frag->base.des_dst = NULL;
    frag->base.des_dst_cnt = 0;
    frag->base.des_flags = flags;
    frag->base.order = MCA_BTL_NO_ORDER;
    return &frag->base;
}
/*
 * opal_datatype_sndrcv
 *
 * Function: - copy MPI message from buffer into another
 *           - send/recv done if cannot optimize
 * Accepts:  - send buffer
 *           - send count
 *           - send datatype
 *           - receive buffer
 *           - receive count
 *           - receive datatype
 *           - tag
 *           - communicator
 * Returns:  - MPI_SUCCESS or error code
 */
int32_t ompi_datatype_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype,
                              void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype)
{
    opal_convertor_t send_convertor, recv_convertor;
    struct iovec iov;
    int length, completed;
    uint32_t iov_count;
    size_t max_data;

    /* First check if we really have something to do */
    if (0 == rcount || 0 == rdtype->super.size) {
        return ((0 == scount || 0 == sdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE);
    }

    /* If same datatypes used, just copy. */
    if (sdtype == rdtype) {
        int32_t count = ( scount < rcount ? scount : rcount );
        opal_datatype_copy_content_same_ddt(&(rdtype->super), count, (char*)rbuf, (char*)sbuf);
        return ((scount > rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS);
    }

    /* If receive packed. */
    if (rdtype->id == OMPI_DATATYPE_MPI_PACKED) {
        OBJ_CONSTRUCT( &send_convertor, opal_convertor_t );
        opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor,
                                                  &(sdtype->super), scount, sbuf, 0,
                                                  &send_convertor );

        iov_count = 1;
        iov.iov_base = (IOVBASE_TYPE*)rbuf;
        iov.iov_len = scount * sdtype->super.size;
        if( (int32_t)iov.iov_len > rcount ) iov.iov_len = rcount;

        opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
        OBJ_DESTRUCT( &send_convertor );
        return ((max_data < (size_t)rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS);
    }

    /* If send packed. */
    if (sdtype->id == OMPI_DATATYPE_MPI_PACKED) {
        OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t );
        opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor,
                                                  &(rdtype->super), rcount, rbuf, 0,
                                                  &recv_convertor );

        iov_count = 1;
        iov.iov_base = (IOVBASE_TYPE*)sbuf;
        iov.iov_len = rcount * rdtype->super.size;
        if( (int32_t)iov.iov_len > scount ) iov.iov_len = scount;

        opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data );
        OBJ_DESTRUCT( &recv_convertor );
        return (((size_t)scount > max_data) ? MPI_ERR_TRUNCATE : MPI_SUCCESS);
    }

    iov.iov_len = length = 64 * 1024;
    iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) );

    OBJ_CONSTRUCT( &send_convertor, opal_convertor_t );
    opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor,
                                              &(sdtype->super), scount, sbuf, 0,
                                              &send_convertor );
    OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t );
    opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor,
                                              &(rdtype->super), rcount, rbuf, 0,
                                              &recv_convertor );

    completed = 0;
    while( !completed ) {
        iov.iov_len = length;
        iov_count = 1;
        max_data = length;
        completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
        completed |= opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data );
    }
    free( iov.iov_base );
    OBJ_DESTRUCT( &send_convertor );
    OBJ_DESTRUCT( &recv_convertor );

    return ( (scount * sdtype->super.size) <= (rcount * rdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE );
}
Example #27
0
/**
 * Prepare data for send/put
 *
 * @param btl (IN)      BTL module
 */
struct mca_btl_base_descriptor_t*
mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
                          struct mca_btl_base_endpoint_t* endpoint,
                          mca_mpool_base_registration_t* registration,
                          struct opal_convertor_t* convertor,
                          uint8_t order,
                          size_t reserve,
                          size_t* size,
                          uint32_t flags )
{
    mca_btl_self_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

    /* non-contigous data */
    if( opal_convertor_need_buffers(convertor) ||
        max_data < mca_btl_self.btl_max_send_size ||
        reserve != 0 ) {

        MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
        if(OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }

        if(reserve + max_data > frag->size) {
            max_data = frag->size - reserve;
        } 
        iov.iov_len = max_data;
        iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)(frag+1) + reserve);

        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if(rc < 0) {
            MCA_BTL_SELF_FRAG_RETURN_SEND(frag);
            return NULL;
        }
        frag->segment.seg_addr.pval = frag+1;
        frag->segment.seg_len = reserve + max_data;
        *size = max_data;
    } else {
        MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
        if(OPAL_UNLIKELY(NULL == frag)) {
            return NULL;
        }
        iov.iov_len = max_data;
        iov.iov_base = NULL;

        /* convertor should return offset into users buffer */
        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if(rc < 0) {
            MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
            return NULL;
        }
        frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
        frag->segment.seg_len = max_data;
        *size = max_data;
    }
    frag->base.des_flags = flags;
    frag->base.des_src          = &frag->segment;
    frag->base.des_src_cnt      = 1;

    return &frag->base;
}
Example #28
0
/**
 * Pack data and return a descriptor that can be
 * used for send/put.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
mca_btl_base_descriptor_t* mca_btl_sctp_prepare_src(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* endpoint,
    struct mca_mpool_base_registration_t* registration,
    struct opal_convertor_t* convertor,
    uint8_t order,
    size_t reserve,
    size_t* size,
    uint32_t flags)
{
    mca_btl_sctp_frag_t* frag;
    struct iovec iov;
    uint32_t iov_count = 1;
    size_t max_data = *size;
    int rc;

    /*
     * if we aren't pinning the data and the requested size is less
     * than the eager limit pack into a fragment from the eager pool
    */

    if (max_data+reserve <= btl->btl_eager_limit) {
        MCA_BTL_SCTP_FRAG_ALLOC_EAGER(frag, rc);
    }

    /* 
     * otherwise pack as much data as we can into a fragment
     * that is the max send size.
     */
    else {
        MCA_BTL_SCTP_FRAG_ALLOC_MAX(frag, rc);
    }
    if(NULL == frag) {
        return NULL;
    }

    if(max_data == 0) {

        frag->segments[0].seg_addr.pval = (frag + 1);
        frag->segments[0].seg_len = reserve;
        frag->base.des_src_cnt = 1;

    } else if(opal_convertor_need_buffers(convertor)) {

        if (max_data + reserve > frag->size) {
            max_data = frag->size - reserve;
        }
        iov.iov_len = max_data;
        iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag+1)) + reserve);

        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if( rc < 0 ) {
            mca_btl_sctp_free(btl, &frag->base);
            return NULL;
        }

        frag->segments[0].seg_addr.pval = (frag + 1);
        frag->segments[0].seg_len = max_data + reserve;
        frag->base.des_src_cnt = 1;

    } else {

        iov.iov_len = max_data;
        iov.iov_base = NULL;

        rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
        if( rc < 0 ) {
            mca_btl_sctp_free(btl, &frag->base);
            return NULL;
        }

        frag->segments[0].seg_addr.pval = frag+1;
        frag->segments[0].seg_len = reserve;
        frag->segments[1].seg_addr.pval = iov.iov_base;
        frag->segments[1].seg_len = max_data;
        frag->base.des_src_cnt = 2;
    }

    frag->base.des_src = frag->segments;
    frag->base.des_dst = NULL;
    frag->base.des_dst_cnt = 0;
    frag->base.des_flags = flags;
    *size = max_data;
    return &frag->base;
}