예제 #1
0
static size_t mca_pml_yalla_stream_unpack(void *buffer, size_t length, size_t offset,
                                          opal_convertor_t *convertor)
{
    uint32_t iov_count;
    struct iovec iov;

    iov_count    = 1;
    iov.iov_base = buffer;
    iov.iov_len  = length;

    opal_convertor_set_position(convertor, &offset);
    opal_convertor_unpack(convertor, &iov, &iov_count, &length);
    return length;
}
예제 #2
0
/* completion of an accumulate get operation */
static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                                            void *local_address, mca_btl_base_registration_handle_t *local_handle,
                                            void *context, void *data, int status)
{
    ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context;
    intptr_t source = (intptr_t) local_address + request->offset;
    ompi_osc_rdma_sync_t *sync = request->sync;
    ompi_osc_rdma_module_t *module = sync->module;

    assert (OMPI_SUCCESS == status);

    if (OMPI_SUCCESS == status && OMPI_OSC_RDMA_TYPE_GET_ACC == request->type) {
        if (NULL == request->result_addr) {
            /* result buffer is not necessarily contiguous. use the opal datatype engine to
             * copy the data over in this case */
            struct iovec iov = {.iov_base = (void *) source, request->len};
            uint32_t iov_count = 1;
            size_t size = request->len;

            opal_convertor_unpack (&request->convertor, &iov, &iov_count, &size);
            opal_convertor_cleanup (&request->convertor);
        } else {
            /* copy contiguous data to the result buffer */
            ompi_datatype_sndrcv ((void *) source, request->len, MPI_BYTE, request->result_addr,
                                  request->result_count, request->result_dt);
        }

        if (&ompi_mpi_op_no_op.op == request->op) {
            /* this is a no-op. nothing more to do except release resources and the accumulate lock */
            ompi_osc_rdma_acc_put_complete (btl, endpoint, local_address, local_handle, context, data, status);

            return;
        }
    }

    /* accumulate the data */
    if (&ompi_mpi_op_replace.op != request->op) {
        ompi_op_reduce (request->op, request->origin_addr, (void *) source, request->origin_count, request->origin_dt);
    }

    /* initiate the put of the accumulated data */
    status = module->selected_btl->btl_put (module->selected_btl, endpoint, (void *) source,
                                            request->target_address, local_handle,
                                            (mca_btl_base_registration_handle_t *) request->ctx,
                                            request->len, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_acc_put_complete,
                                            request, NULL);
    /* TODO -- we can do better. probably should queue up the next step and handle it in progress */
    assert (OPAL_SUCCESS == status);
}
예제 #3
0
static size_t ompi_mtl_mxm_stream_unpack(void *buffer, size_t length,
                                         size_t offset, void *context)
{
    struct iovec iov;
    uint32_t iov_count = 1;

   mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
    opal_convertor_t *convertor = mtl_mxm_request->convertor;

    iov.iov_len = length;
    iov.iov_base = buffer;

    opal_convertor_set_position(convertor, &offset);
    opal_convertor_unpack(convertor, &iov, &iov_count, &length);

    return length;
}
예제 #4
0
static int
unpack_segments( ompi_datatype_t* datatype, int count,
                 size_t segment_size,
                 ddt_segment_t* segments, int seg_count,
                 void* buffer )
{
    opal_convertor_t* convertor;
    size_t max_size, position;
    int i;
    uint32_t iov_count;
    struct iovec iov;

    convertor = opal_convertor_create( opal_local_arch, 0 );
    opal_convertor_prepare_for_recv( convertor, &(datatype->super), count, buffer );

    for( i = 0; i < seg_count; i++ ) {
        iov.iov_len = segments[i].size;
        iov.iov_base = segments[i].buffer;
        max_size = iov.iov_len;

        position = segments[i].position;
        opal_convertor_set_position( convertor, &position );
        if( position != segments[i].position ) {
            opal_output( 0, "Setting position failed (%lu != %lu)\n",
                         (unsigned long)segments[i].position, (unsigned long)position );
            break;
        }

        iov_count = 1;
        opal_convertor_unpack( convertor, &iov, &iov_count, &max_size );
        if( max_size != segments[i].size ) {
            opal_output( 0, "Amount of unpacked data do not match (%lu != %lu)\n",
                         (unsigned long)max_size, (unsigned long)segments[i].size );
            opal_output( 0, "Segment %d position %lu size %lu\n", i,
                         (unsigned long)segments[i].position, segments[i].size );
        }
    }
    OBJ_RELEASE(convertor);
    return 0;
}
예제 #5
0
void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl, 
                                          mca_btl_base_tag_t tag,
                                          mca_btl_base_descriptor_t* des,
                                          void* cbdata ) { 
    mca_btl_base_segment_t* segments = des->des_dst;
    mca_pml_csum_match_hdr_t* hdr = (mca_pml_csum_match_hdr_t*)segments->seg_addr.pval;
    ompi_communicator_t *comm_ptr;
    mca_pml_csum_recv_request_t *match = NULL;
    mca_pml_csum_comm_t *comm;
    mca_pml_csum_comm_proc_t *proc;
    mca_pml_csum_recv_frag_t* frag = NULL;
    size_t num_segments = des->des_dst_cnt;
    size_t bytes_received = 0;
    uint16_t csum_received, csum=0;
    uint32_t csum_data;
    
    if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_CSUM_MATCH_HDR_LEN) ) {
        return;
     }
    csum_hdr_ntoh(((mca_pml_csum_hdr_t*) hdr), MCA_PML_CSUM_HDR_TYPE_MATCH);

    csum_received = hdr->hdr_common.hdr_csum;
    hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
    hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
    csum = opal_csum16(hdr, OMPI_PML_CSUM_MATCH_HDR_LEN);
    hdr->hdr_common.hdr_csum = csum_received;
    
    OPAL_OUTPUT_VERBOSE((5, mca_pml_base_output,
                         "%s:%s:%d common_hdr: %02x:%02x:%04x   match_hdr:  %04x:%04x:%08x:%08x:%08x",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__,
                         hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum,
                         hdr->hdr_ctx, hdr->hdr_seq, hdr->hdr_src, hdr->hdr_tag, hdr->hdr_csum));
    
    if (csum_received != csum) {
        opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x  != computed csum:0x%04x\n",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
        orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
                          "Checksum header violation: job %s file %s line %d",
                          (NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
                          __FILE__, __LINE__);
        dump_csum_error_data(segments, 1);
        orte_errmgr.abort(-1,NULL);
    }
    
    /* communicator pointer */
    comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
    if(OPAL_UNLIKELY(NULL == comm_ptr)) {
        /* This is a special case. A message for a not yet existing
         * communicator can happens. Instead of doing a matching we
         * will temporarily add it the a pending queue in the PML.
         * Later on, when the communicator is completely instantiated,
         * this pending queue will be searched and all matching fragments
         * moved to the right communicator.
         */
        append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending,
                             btl, hdr, segments, num_segments, frag );
        return;
    }
    comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
    
    /* source sequence number */
    proc = &comm->procs[hdr->hdr_src];
 
    /* We generate the MSG_ARRIVED event as soon as the PML is aware
     * of a matching fragment arrival. Independing if it is received
     * on the correct order or not. This will allow the tools to
     * figure out if the messages are not received in the correct
     * order (if multiple network interfaces).
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
 
    /* get next expected message sequence number - if threaded
     * run, lock to make sure that if another thread is processing
     * a frag from the same message a match is made only once.
     * Also, this prevents other posted receives (for a pair of
     * end points) from being processed, and potentially "loosing"
     * the fragment.
     */
    OPAL_THREAD_LOCK(&comm->matching_lock);
    
     /* get sequence number of next message that can be processed */
    if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) ||
                     (opal_list_get_size(&proc->frags_cant_match) > 0 ))) {
        goto slow_path;
    }
    
    /* This is the sequence number we were expecting, so we can try
     * matching it to already posted receives.
     */
    
    /* We're now expecting the next sequence number. */
    proc->expected_sequence++;

    /* We generate the SEARCH_POSTED_QUEUE only when the message is
     * received in the correct sequence. Otherwise, we delay the event
     * generation until we reach the correct sequence number.
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
                            hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
    
    match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag);
    
    /* The match is over. We generate the SEARCH_POSTED_Q_END here,
     * before going into the mca_pml_csum_check_cantmatch_for_match so
     * we can make a difference for the searching time for all
     * messages.
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
    
    /* release matching lock before processing fragment */
    OPAL_THREAD_UNLOCK(&comm->matching_lock);
    
    if(OPAL_LIKELY(match)) {
        bytes_received = segments->seg_len - OMPI_PML_CSUM_MATCH_HDR_LEN;
        match->req_recv.req_bytes_packed = bytes_received;
        
        MCA_PML_CSUM_RECV_REQUEST_MATCHED(match, hdr);
        if(bytes_received > 0) { 
            struct iovec iov[2];
            uint32_t iov_count = 1;
            
            /*
             *  Make user buffer accessable(defined) before unpacking.
             */
            MEMCHECKER(
                       memchecker_call(&opal_memchecker_base_mem_defined,
                                       match->req_recv.req_base.req_addr,
                                       match->req_recv.req_base.req_count,
                                       match->req_recv.req_base.req_datatype);
                       );
            
            iov[0].iov_len = bytes_received;
            iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval +
                                              OMPI_PML_CSUM_MATCH_HDR_LEN);
            while (iov_count < num_segments) {
                bytes_received += segments[iov_count].seg_len;
                iov[iov_count].iov_len = segments[iov_count].seg_len;
                iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval);
                iov_count++;
            }
            opal_convertor_unpack( &match->req_recv.req_base.req_convertor,
                                   iov,
                                   &iov_count,
                                   &bytes_received );
            match->req_bytes_received = bytes_received;
            /*
             *  Unpacking finished, make the user buffer unaccessable again.
             */
            MEMCHECKER(
                       memchecker_call(&opal_memchecker_base_mem_noaccess,
                                       match->req_recv.req_base.req_addr,
                                       match->req_recv.req_base.req_count,
                                       match->req_recv.req_base.req_datatype);
                       );
        }
static int test_upper( unsigned int length )
{
    double *mat1, *mat2, *inbuf;
    opal_datatype_t *pdt;
    opal_convertor_t * pConv;
    char *ptr;
    int rc;
    unsigned int i, j, iov_count, split_chunk, total_length;
    size_t max_data;
    struct iovec a;
    TIMER_DATA_TYPE start, end;
    long total_time;

    printf( "test upper matrix\n" );
    pdt = upper_matrix( length );
    opal_datatype_dump( pdt );

    mat1 = malloc( length * length * sizeof(double) );
    init_random_upper_matrix( length, mat1 );
    mat2 = calloc( length * length, sizeof(double) );

    total_length = length * (length + 1) * ( sizeof(double) / 2);
    inbuf = (double*)malloc( total_length );
    ptr = (char*)inbuf;
    /* copy upper matrix in the array simulating the input buffer */
    for( i = 0; i < length; i++ ) {
        uint32_t pos = i * length + i;
        for( j = i; j < length; j++, pos++ ) {
            *inbuf = mat1[pos];
            inbuf++;
        }
    }
    inbuf = (double*)ptr;
    pConv = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, pdt, 1, mat2 ) ) {
        printf( "Cannot attach the datatype to a convertor\n" );
        return OPAL_ERROR;
    }

    GET_TIME( start );
    split_chunk = (length + 1) * sizeof(double);
    /*    split_chunk = (total_length + 1) * sizeof(double); */
    for( i = total_length; i > 0; ) {
        if( i <= split_chunk ) {  /* equal test just to be able to set a breakpoint */
            split_chunk = i;
        }
        a.iov_base = ptr;
        a.iov_len = split_chunk;
        iov_count = 1;
        max_data = split_chunk;
        opal_convertor_unpack( pConv, &a, &iov_count, &max_data );
        ptr += max_data;
        i -= max_data;
        if( mat2[0] != inbuf[0] ) assert(0);
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "complete unpacking in %ld microsec\n", total_time );
    free( inbuf );
    rc = check_diag_matrix( length, mat1, mat2 );
    free( mat1 );
    free( mat2 );

    /* test the automatic destruction pf the data */
    opal_datatype_destroy( &pdt );
    assert( pdt == NULL );

    OBJ_RELEASE( pConv );
    return rc;
}
static int local_copy_with_convertor( const opal_datatype_t const* pdt, int count, int chunk )
{
    OPAL_PTRDIFF_TYPE extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    opal_datatype_type_extent( pdt, &extent );

    pdst  = malloc( extent * count );
    psrc  = malloc( extent * count );
    ptemp = malloc( chunk );

    {
        int i = 0;
        for( ; i < (count * extent); ((char*)psrc)[i] = i % 128 + 32, i++ );
    }
    memset( pdst, 0, count * extent );

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OPAL_SUCCESS;
}
예제 #8
0
static int local_copy_with_convertor( opal_datatype_t const * const pdt, int count, int chunk )
{
    OPAL_PTRDIFF_TYPE lb, extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    char *odst, *osrc;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data, length = 0, malloced_size;
    int32_t done1 = 0, done2 = 0, errors = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    malloced_size = compute_memory_size(pdt, count);
    opal_datatype_get_extent( pdt, &lb, &extent );

    odst = (char*)malloc( malloced_size );
    osrc = (char*)malloc( malloced_size );
    ptemp = malloc( chunk );

    {
        for( size_t i = 0; i < malloced_size; osrc[i] = i % 128 + 32, i++ );
        memcpy(odst, osrc, malloced_size);
    }
    pdst  = odst - lb;
    psrc  = osrc - lb;

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
        if( outputFlags & RESET_CONVERTORS ) {
            struct dt_stack_t stack[1+send_convertor->stack_pos];
            int i, stack_pos = send_convertor->stack_pos;
            size_t pos;

            if( 0 == done1 ) {
                memcpy(stack, send_convertor->pStack, (1+send_convertor->stack_pos) * sizeof(struct dt_stack_t));
                pos = 0;
                opal_convertor_set_position(send_convertor, &pos);
                pos = length;
                opal_convertor_set_position(send_convertor, &pos);
                assert(pos == length);
                for(i = 0; i <= stack_pos; i++ ) {
                    if( stack[i].index != send_convertor->pStack[i].index )
                        {errors = 1; printf("send stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n",
                                            i, stack[i].index, send_convertor->pStack[i].index,
                                            length, pdt->size * count);}
                    if( stack[i].count != send_convertor->pStack[i].count ) {
                        if( stack[i].type == send_convertor->pStack[i].type ) {
                            {errors = 1; printf("send stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n",
                                                    i, stack[i].count, send_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        } else {
                            if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= send_convertor->pStack[i].type) )
                                {errors = 1; printf("send stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, (int)send_convertor->pStack[i].type,
                                                    length, pdt->size * count);}
                            else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) !=
                                     (send_convertor->pStack[i].count * opal_datatype_basicDatatypes[send_convertor->pStack[i].type]->size) )
                                {errors = 1; printf("send stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, stack[i].count,
                                                    (int)send_convertor->pStack[i].type, send_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        }
                    }
                    if( stack[i].disp != send_convertor->pStack[i].disp )
                        {errors = 1; printf("send stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n",
                                            i, (void*)stack[i].disp, (void*)send_convertor->pStack[i].disp,
                                            length, pdt->size * count);}
                    if(0 != errors) {assert(0); exit(-1);}
                }
            }
            if( 0 == done2 ) {
                memcpy(stack, recv_convertor->pStack, (1+recv_convertor->stack_pos) * sizeof(struct dt_stack_t));
                pos = 0;
                opal_convertor_set_position(recv_convertor, &pos);
                pos = length;
                opal_convertor_set_position(recv_convertor, &pos);
                assert(pos == length);
                for(i = 0; i <= stack_pos; i++ ) {
                    if( stack[i].index != recv_convertor->pStack[i].index )
                        {errors = 1; printf("recv stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n",
                                            i, stack[i].index, recv_convertor->pStack[i].index,
                                            length, pdt->size * count);}
                    if( stack[i].count != recv_convertor->pStack[i].count ) {
                        if( stack[i].type == recv_convertor->pStack[i].type ) {
                            {errors = 1; printf("recv stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n",
                                                    i, stack[i].count, recv_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        } else {
                            if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= recv_convertor->pStack[i].type) )
                                {errors = 1; printf("recv stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, (int)recv_convertor->pStack[i].type,
                                                    length, pdt->size * count);}
                            else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) !=
                                     (recv_convertor->pStack[i].count * opal_datatype_basicDatatypes[recv_convertor->pStack[i].type]->size) )
                                {errors = 1; printf("recv stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, stack[i].count,
                                                    (int)recv_convertor->pStack[i].type, recv_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        }
                    }
                    if( stack[i].disp != recv_convertor->pStack[i].disp )
                        {errors = 1; printf("recv stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n",
                                            i, (void*)stack[i].disp, (void*)recv_convertor->pStack[i].disp,
                                            length, pdt->size * count);}
                    if(0 != errors) {assert(0); exit(-1);}
                }
            }
        }
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );

    if(outputFlags & VALIDATE_DATA) {
        for( size_t i = errors = 0; i < malloced_size; i++ ) {
            if( odst[i] != osrc[i] ) {
                printf("error at position %lu (%d != %d)\n",
                       (unsigned long)i, (int)(odst[i]), (int)(osrc[i]));
                errors++;
                if(outputFlags & QUIT_ON_FIRST_ERROR) {
                    opal_datatype_dump(pdt);
                    assert(0); exit(-1);
                }
            }
        }
        if( 0 == errors ) {
            printf("Validation check succesfully passed\n");
        } else {
            printf("Found %d errors. Giving up!\n", errors);
            exit(-1);
        }
    }
 clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != odst ) free( odst );
    if( NULL != osrc ) free( osrc );
    if( NULL != ptemp ) free( ptemp );
    return (0 == errors ? OPAL_SUCCESS : errors);
}
예제 #9
0
static int
local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, int send_count,
                                      opal_datatype_t const * const recv_type, int recv_count,
                                      int chunk )
{
    OPAL_PTRDIFF_TYPE send_lb, send_extent, recv_lb, recv_extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    char *odst, *osrc;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data, length = 0, send_malloced_size, recv_malloced_size;;
    int32_t done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    send_malloced_size = compute_memory_size(send_type, send_count);
    recv_malloced_size = compute_memory_size(recv_type, recv_count);

    opal_datatype_get_extent( send_type, &send_lb, &send_extent );
    opal_datatype_get_extent( recv_type, &recv_lb, &recv_extent );

    odst = (char*)malloc( recv_malloced_size );
    osrc = (char*)malloc( send_malloced_size );
    ptemp = malloc( chunk );

    /* fill up the receiver with ZEROS */
    {
        for( size_t i = 0; i < send_malloced_size; i++ )
            osrc[i] = i % 128 + 32;
    }
    memset( odst, 0, recv_malloced_size );
    pdst  = odst - recv_lb;
    psrc  = osrc - send_lb;

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }
    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;

        if( outputFlags & RESET_CONVERTORS ) {
            size_t pos = 0;
            opal_convertor_set_position(send_convertor, &pos);
            pos = length;
            opal_convertor_set_position(send_convertor, &pos);
            assert(pos == length);

            pos = 0;
            opal_convertor_set_position(recv_convertor, &pos);
            pos = length;
            opal_convertor_set_position(recv_convertor, &pos);
            assert(pos == length);
        }
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying different data-types using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( send_convertor != NULL ) {
        OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL );
    }
    if( recv_convertor != NULL ) {
        OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL );
    }
    if( NULL != odst ) free( odst );
    if( NULL != osrc ) free( osrc );
    if( NULL != ptemp ) free( ptemp );
    return OPAL_SUCCESS;
}
예제 #10
0
void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, 
                                          mca_btl_base_tag_t tag,
                                          mca_btl_base_descriptor_t* des,
                                          void* cbdata )
{
    mca_btl_base_segment_t* segments = des->des_dst;
    mca_pml_ob1_match_hdr_t* hdr = (mca_pml_ob1_match_hdr_t*)segments->seg_addr.pval;
    ompi_communicator_t *comm_ptr;
    mca_pml_ob1_recv_request_t *match = NULL;
    mca_pml_ob1_comm_t *comm;
    mca_pml_ob1_comm_proc_t *proc;
    size_t num_segments = des->des_dst_cnt;
    size_t bytes_received = 0;
    
    if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_OB1_MATCH_HDR_LEN) ) {
        return;
    }
    ob1_hdr_ntoh(((mca_pml_ob1_hdr_t*) hdr), MCA_PML_OB1_HDR_TYPE_MATCH);
    
    /* communicator pointer */
    comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
    if(OPAL_UNLIKELY(NULL == comm_ptr)) {
        /* This is a special case. A message for a not yet existing
         * communicator can happens. Instead of doing a matching we
         * will temporarily add it the a pending queue in the PML.
         * Later on, when the communicator is completely instantiated,
         * this pending queue will be searched and all matching fragments
         * moved to the right communicator.
         */
        append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending,
                             btl, hdr, segments, num_segments, NULL );
        return;
    }
    comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
    
    /* source sequence number */
    proc = &comm->procs[hdr->hdr_src];
 
    /* We generate the MSG_ARRIVED event as soon as the PML is aware
     * of a matching fragment arrival. Independing if it is received
     * on the correct order or not. This will allow the tools to
     * figure out if the messages are not received in the correct
     * order (if multiple network interfaces).
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
 
    /* get next expected message sequence number - if threaded
     * run, lock to make sure that if another thread is processing
     * a frag from the same message a match is made only once.
     * Also, this prevents other posted receives (for a pair of
     * end points) from being processed, and potentially "loosing"
     * the fragment.
     */
    OPAL_THREAD_LOCK(&comm->matching_lock);
    
     /* get sequence number of next message that can be processed */
    if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) ||
                     (opal_list_get_size(&proc->frags_cant_match) > 0 ))) {
        goto slow_path;
    }
    
    /* This is the sequence number we were expecting, so we can try
     * matching it to already posted receives.
     */
    
    /* We're now expecting the next sequence number. */
    proc->expected_sequence++;

    /* We generate the SEARCH_POSTED_QUEUE only when the message is
     * received in the correct sequence. Otherwise, we delay the event
     * generation until we reach the correct sequence number.
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
                            hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
    
    match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL);
    
    /* The match is over. We generate the SEARCH_POSTED_Q_END here,
     * before going into the mca_pml_ob1_check_cantmatch_for_match so
     * we can make a difference for the searching time for all
     * messages.
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
    
    /* release matching lock before processing fragment */
    OPAL_THREAD_UNLOCK(&comm->matching_lock);

    if(OPAL_LIKELY(match)) {
        bytes_received = segments->seg_len - OMPI_PML_OB1_MATCH_HDR_LEN;
        match->req_recv.req_bytes_packed = bytes_received;
        
        MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr);
        if(match->req_bytes_expected > 0) { 
            struct iovec iov[2];
            uint32_t iov_count = 1;
            
            /*
             *  Make user buffer accessable(defined) before unpacking.
             */
            MEMCHECKER(
                       memchecker_call(&opal_memchecker_base_mem_defined,
                                       match->req_recv.req_base.req_addr,
                                       match->req_recv.req_base.req_count,
                                       match->req_recv.req_base.req_datatype);
                       );
            
            iov[0].iov_len = bytes_received;
            iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval +
                                              OMPI_PML_OB1_MATCH_HDR_LEN);
            while (iov_count < num_segments) {
                bytes_received += segments[iov_count].seg_len;
                iov[iov_count].iov_len = segments[iov_count].seg_len;
                iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval);
                iov_count++;
            }
            opal_convertor_unpack( &match->req_recv.req_base.req_convertor,
                                   iov,
                                   &iov_count,
                                   &bytes_received );
            match->req_bytes_received = bytes_received;
            /*
             *  Unpacking finished, make the user buffer unaccessable again.
             */
            MEMCHECKER(
                       memchecker_call(&opal_memchecker_base_mem_noaccess,
                                       match->req_recv.req_base.req_addr,
                                       match->req_recv.req_base.req_count,
                                       match->req_recv.req_base.req_datatype);
                       );
        }
/*
 * opal_datatype_sndrcv
 *
 * Function: - copy MPI message from buffer into another
 *           - send/recv done if cannot optimize
 * Accepts:  - send buffer
 *           - send count
 *           - send datatype
 *           - receive buffer
 *           - receive count
 *           - receive datatype
 *           - tag
 *           - communicator
 * Returns:  - MPI_SUCCESS or error code
 */
int32_t ompi_datatype_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype,
                              void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype)
{
    opal_convertor_t send_convertor, recv_convertor;
    struct iovec iov;
    int length, completed;
    uint32_t iov_count;
    size_t max_data;

    /* First check if we really have something to do */
    if (0 == rcount || 0 == rdtype->super.size) {
        return ((0 == scount || 0 == sdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE);
    }

    /* If same datatypes used, just copy. */
    if (sdtype == rdtype) {
        int32_t count = ( scount < rcount ? scount : rcount );
        opal_datatype_copy_content_same_ddt(&(rdtype->super), count, (char*)rbuf, (char*)sbuf);
        return ((scount > rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS);
    }

    /* If receive packed. */
    if (rdtype->id == OMPI_DATATYPE_MPI_PACKED) {
        OBJ_CONSTRUCT( &send_convertor, opal_convertor_t );
        opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor,
                                                  &(sdtype->super), scount, sbuf, 0,
                                                  &send_convertor );

        iov_count = 1;
        iov.iov_base = (IOVBASE_TYPE*)rbuf;
        iov.iov_len = scount * sdtype->super.size;
        if( (int32_t)iov.iov_len > rcount ) iov.iov_len = rcount;

        opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
        OBJ_DESTRUCT( &send_convertor );
        return ((max_data < (size_t)rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS);
    }

    /* If send packed. */
    if (sdtype->id == OMPI_DATATYPE_MPI_PACKED) {
        OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t );
        opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor,
                                                  &(rdtype->super), rcount, rbuf, 0,
                                                  &recv_convertor );

        iov_count = 1;
        iov.iov_base = (IOVBASE_TYPE*)sbuf;
        iov.iov_len = rcount * rdtype->super.size;
        if( (int32_t)iov.iov_len > scount ) iov.iov_len = scount;

        opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data );
        OBJ_DESTRUCT( &recv_convertor );
        return (((size_t)scount > max_data) ? MPI_ERR_TRUNCATE : MPI_SUCCESS);
    }

    iov.iov_len = length = 64 * 1024;
    iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) );

    OBJ_CONSTRUCT( &send_convertor, opal_convertor_t );
    opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor,
                                              &(sdtype->super), scount, sbuf, 0,
                                              &send_convertor );
    OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t );
    opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor,
                                              &(rdtype->super), rcount, rbuf, 0,
                                              &recv_convertor );

    completed = 0;
    while( !completed ) {
        iov.iov_len = length;
        iov_count = 1;
        max_data = length;
        completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
        completed |= opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data );
    }
    free( iov.iov_base );
    OBJ_DESTRUCT( &send_convertor );
    OBJ_DESTRUCT( &recv_convertor );

    return ( (scount * sdtype->super.size) <= (rcount * rdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE );
}
예제 #12
0
/* called when a receive should be progressed */
static int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request =
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    /* as soon as we've seen any event associated with a request, it's
       started */
    ptl_request->req_started = true;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate expected: %ld %ld",
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
            /* If it's not a short message and we're doing rndv, we
               only have the first part of the message.  Issue the get
               to pull the second part of the message. */
            ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
                           ((msg_length > ptl_request->delivery_len) ?
                            ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
                           ev->initiator,
                           ev->hdr_data,
                           ompi_mtl_portals4.eager_limit,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }

        } else {
            /* If we're either using the eager protocol or were a
               short message, all data has been received, so complete
               the message. */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_REPLY:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got reply event",
                             ptl_request->opcount, ptl_request->hdr_data));

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            PtlMDRelease(ptl_request->md_h);
            goto callback_error;
        }

        /* set the received length in the status, now that we know
           excatly how much data was sent. */
        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (ompi_mtl_portals4.protocol == rndv) {
            ptl_request->super.super.ompi_req->req_status._ucount +=
                ompi_mtl_portals4.eager_limit;
        }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
#endif

        /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
        ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                       ptl_request->delivery_ptr,
                                       ptl_request->super.super.ompi_req->req_status._ucount);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                __FILE__, __LINE__, ret);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
        }
        PtlMDRelease(ptl_request->md_h);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) completed, reply",
                             ptl_request->opcount, ptl_request->hdr_data));
        ptl_request->super.super.completion_callback(&ptl_request->super.super);
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate unexpected: %ld %ld %d",
                                msg_length, ptl_request->delivery_len,
                                MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor,
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (OPAL_UNLIKELY(ret < 0)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                     "Recv %lu (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (OPAL_UNLIKELY(PTL_OK != ret)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {
            if (ev->mlength > 0) {
                /* if rndv or triggered, copy the eager part to the right place */
                memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
            }

            ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
                           ((msg_length > ptl_request->delivery_len) ?
                            ptl_request->delivery_len : msg_length) - ev->mlength,
                           ev->initiator,
                           ev->hdr_data,
                           ev->mlength,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }
        }

        break;

    case PTL_EVENT_LINK:
        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}
예제 #13
0
static int
local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count,
                                      ompi_datatype_t* recv_type, int recv_count,
                                      int chunk )
{
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;
    size_t slength, rlength;

    rlength = compute_buffer_length(recv_type, recv_count);
    slength = compute_buffer_length(send_type, send_count);
    pdst  = malloc( rlength );
    psrc  = malloc( slength );
    ptemp = malloc( chunk );

    /* initialize the buffers to prevent valgrind from complaining */
    for( int i = 0; i < slength; i++ )
            ((char*)psrc)[i] = i % 128 + 32;
    memset(pdst, 0, rlength);

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, &(send_type->super), send_count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }
    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, &(recv_type->super), recv_count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying different data-types using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( send_convertor != NULL ) {
        OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL );
    }
    if( recv_convertor != NULL ) {
        OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL );
    }
    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OMPI_SUCCESS;
}
예제 #14
0
파일: unpack_ooo.c 프로젝트: Slbomber/ompi
static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
    int i, j, errors = 0;
    struct iovec a;
    unsigned int iov_count;
    size_t max_data;
    size_t pos;
    opal_convertor_t * pConv;

    for (j = 0; j < N; ++j) {
        pbar[j].i[0] = 123+j;
        pbar[j].i[1] = 789+j;
        pbar[j].d[0] = 123.456+j;
        pbar[j].d[1] = 789.123+j;
        memset(&bar[j].i[0], 0xFF, sizeof(int));
        memset(&bar[j].i[2], 0xFF, sizeof(int));
        bar[j].i[1] = 0;
        memset(&bar[j].d[0], 0xFF, sizeof(double));
        memset(&bar[j].d[2], 0xFF, sizeof(double));
        bar[j].d[1] = 0.0;
    }

    pConv = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &(newtype->super), N, bar ) ) {
        printf( "Cannot attach the datatype to a convertor\n" );
        return OMPI_ERROR;
    }

    for (i=0; arr[i][0] != 0; i++) {
        /* add some garbage before and after the source data */
        a.iov_base = malloc(arr[i][0]+2048);
        if (NULL == a.iov_base) {
            printf("cannot malloc iov_base\n");
            return 1;
        }
        memset(a.iov_base, 0xAA, 1024);
        memcpy((char*)a.iov_base+1024, (char *)pbar + arr[i][1], arr[i][0]);
        memset((char*)a.iov_base+1024+arr[i][0], 0xAA, 1024);
        a.iov_base = (char*)a.iov_base + 1024;
        a.iov_len = arr[i][0];
        iov_count = 1;
        max_data = a.iov_len;
        pos = arr[i][1];
        opal_convertor_set_position(pConv, &pos);
        assert(arr[i][1] == pos);
        opal_convertor_unpack( pConv, &a, &iov_count, &max_data );
        a.iov_base = (char*)a.iov_base - 1024;
        free(a.iov_base);
    }

    for (j = 0; j < N; ++j) {
        if (bar[j].i[0] != pbar[j].i[0] ||
            bar[j].i[1] != 0 ||
            bar[j].i[2] != pbar[j].i[1] ||
            bar[j].d[0] != pbar[j].d[0] ||
            bar[j].d[1] != 0.0 ||
            bar[j].d[2] != pbar[j].d[1]) {
            if(0 == errors) {
                fprintf(stderr, "ERROR ! count=%d, position=%d, ptr = %p"
                        " got (%d,%d,%d,%g,%g,%g) expected (%d,%d,%d,%g,%g,%g)\n", 
                        N, j, (void*)&bar[j],
                        bar[j].i[0],
                        bar[j].i[1],
                        bar[j].i[2],
                        bar[j].d[0],
                        bar[j].d[1],
                        bar[j].d[2],
                        pbar[j].i[0],
                        0,
                        pbar[j].i[1],
                        pbar[j].d[0],
                        0.0,
                        pbar[j].d[1]);
                print_bar_pbar(&bar[j], &pbar[j]);
            }
            errors++;
        }
    }
    OBJ_RELEASE( pConv );
    return errors;
}
예제 #15
0
static int
ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
        ompi_coll_portals4_request_t *request)
{
    int ret, line;

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));

    ret = cleanup_scatter_handles(request);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    ret = cleanup_sync_handles(request);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    if (NULL != request->u.scatter.unpack_dst_buf) {
        uint32_t iov_count = 1;
        struct iovec iov;
        size_t max_data;

        ompi_coll_portals4_create_recv_converter (&request->u.scatter.recv_converter,
                request->u.scatter.unpack_dst_buf,
                ompi_comm_peer_lookup(comm, request->u.scatter.my_rank),
                request->u.scatter.unpack_dst_count,
                request->u.scatter.unpack_dst_dtype);

        iov.iov_len = request->u.scatter.packed_size;
        if (request->u.scatter.my_rank == request->u.scatter.root_rank) {
            /* unpack my data from the location in scatter_buf where is was packed */
            uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * request->u.scatter.my_rank;
            iov.iov_base = (IOVBASE_TYPE *)((char *)request->u.scatter.scatter_buf + offset);
        } else {
            iov.iov_base = (IOVBASE_TYPE *)request->u.scatter.scatter_buf;
        }
        opal_convertor_unpack(&request->u.scatter.recv_converter, &iov, &iov_count, &max_data);

        OBJ_DESTRUCT(&request->u.scatter.recv_converter);
    }

    if (request->u.scatter.free_after)
        free(request->u.scatter.scatter_buf);

    request->super.req_status.MPI_ERROR = OMPI_SUCCESS;

    OPAL_THREAD_LOCK(&ompi_request_lock);
    ompi_request_complete(&request->super, true);
    OPAL_THREAD_UNLOCK(&ompi_request_lock);

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));

    return OMPI_SUCCESS;

err_hdlr:
    request->super.req_status.MPI_ERROR = ret;

    if (request->u.scatter.free_after)
        free(request->u.scatter.scatter_buf);

    opal_output(ompi_coll_base_framework.framework_output,
                "%s:%4d:%4d\tError occurred ret=%d, rank %2d",
                __FILE__, __LINE__, line, ret, request->u.scatter.my_rank);

    return ret;
}
예제 #16
0
/* called when a receive should be progressed */
static int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request =
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    /* as soon as we've seen any event associated with a request, it's
       started */
    ptl_request->req_started = true;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            ret = PTL_FAIL;
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate expected: %ld %ld",
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

        if (ev->mlength < msg_length)
             OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "Truncated message, some PtlGet are required (protocol = %d)",
                                 ompi_mtl_portals4.protocol));

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && msg_length > ev->mlength) {
            /* If it's not a short message and we're doing rndv and the message is not complete,  we
               only have the first part of the message.  Issue the get
               to pull the second part of the message. */
            ret = read_msg((char*)ptl_request->delivery_ptr + ev->mlength,
                           ((msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length) - ev->mlength,
                           ev->initiator,
                           ev->hdr_data,
                           ev->mlength,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }
        } else {
            /* If we're either using the eager protocol or were a
               short message, all data has been received, so complete
               the message. */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            ret = PTL_FAIL;
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate unexpected: %ld %ld %d",
                                msg_length, ptl_request->delivery_len,
                                MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor,
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (OPAL_UNLIKELY(ret < 0)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                     "Recv %lu (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (OPAL_UNLIKELY(PTL_OK != ret)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {

            /* For long messages in the overflow list, ev->mlength = 0 */
            ptl_request->super.super.ompi_req->req_status._ucount = 0;

            ret = read_msg((char*)ptl_request->delivery_ptr,
                           (msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length,
                           ev->initiator,
                           ev->hdr_data,
                           0,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }
        }

        break;

    case PTL_EVENT_LINK:
        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}
예제 #17
0
int mca_common_ompio_file_read (ompio_file_t *fh,
			      void *buf,
			      int count,
			      struct ompi_datatype_t *datatype,
			      ompi_status_public_t *status)
{
    int ret = OMPI_SUCCESS;

    size_t total_bytes_read = 0;       /* total bytes that have been read*/
    size_t bytes_per_cycle = 0;        /* total read in each cycle by each process*/
    int index = 0;
    int cycles = 0;

    uint32_t iov_count = 0;
    struct iovec *decoded_iov = NULL;

    size_t max_data=0, real_bytes_read=0;
    size_t spc=0;
    ssize_t ret_code=0;
    int i = 0; /* index into the decoded iovec of the buffer */
    int j = 0; /* index into the file vie iovec */

    if (fh->f_amode & MPI_MODE_WRONLY){
//      opal_output(10, "Improper use of FILE Mode, Using WRONLY for Read!\n");
        ret = MPI_ERR_ACCESS;
      return ret;
    }

    if ( 0 == count ) {
        if ( MPI_STATUS_IGNORE != status ) {
            status->_ucount = 0;
        }
        return ret;
    }


#if OPAL_CUDA_SUPPORT
    int is_gpu, is_managed;
    opal_convertor_t convertor;
    mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed);
    if ( is_gpu && !is_managed ) {
        char *tbuf=NULL;

        OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count);        
        
   }
    else {
        mca_common_ompio_decode_datatype (fh,
                                          datatype,
                                          count,
                                          buf,
                                          &max_data,
                                          &decoded_iov,
                                          &iov_count);
    }
#else
    mca_common_ompio_decode_datatype (fh,
                                      datatype,
                                      count,
                                      buf,
                                      &max_data,
                                      &decoded_iov,
                                      &iov_count);
#endif
    if ( 0 < max_data && 0 == fh->f_iov_count  ) {
        if ( MPI_STATUS_IGNORE != status ) {
            status->_ucount = 0;
        }
        return OMPI_SUCCESS;
    }

    if ( -1 == OMPIO_MCA_GET(fh, cycle_buffer_size )) {
        bytes_per_cycle = max_data;
    }
    else {
	bytes_per_cycle = OMPIO_MCA_GET(fh, cycle_buffer_size);
    }
    cycles = ceil((double)max_data/bytes_per_cycle);
    
#if 0
	printf ("Bytes per Cycle: %d   Cycles: %d max_data:%d \n",bytes_per_cycle, cycles, max_data);
#endif

    j = fh->f_index_in_file_view;

    for (index = 0; index < cycles; index++) {

	mca_common_ompio_build_io_array ( fh,
                                          index,
                                          cycles,
                                          bytes_per_cycle,
                                          max_data,
                                          iov_count,
                                          decoded_iov,
                                          &i,
                                          &j,
                                          &total_bytes_read, 
                                          &spc,
                                          &fh->f_io_array,
                                          &fh->f_num_of_io_entries);

        if (fh->f_num_of_io_entries) {
            ret_code = fh->f_fbtl->fbtl_preadv (fh);
            if ( 0<= ret_code ) {
                real_bytes_read+=(size_t)ret_code;
            }
        }

        fh->f_num_of_io_entries = 0;
        if (NULL != fh->f_io_array) {
            free (fh->f_io_array);
            fh->f_io_array = NULL;
        }
    }

#if OPAL_CUDA_SUPPORT
    if ( is_gpu && !is_managed ) {
        size_t pos=0;

        opal_convertor_unpack (&convertor, decoded_iov, &iov_count, &pos );
        opal_convertor_cleanup (&convertor);
        mca_common_ompio_release_buf (fh, decoded_iov->iov_base);
    }
#endif
    if (NULL != decoded_iov) {
        free (decoded_iov);
        decoded_iov = NULL;
    }

    if ( MPI_STATUS_IGNORE != status ) {
        status->_ucount = real_bytes_read;
    }

    return ret;
}
예제 #18
0
/* called when a receive should be progressed */
int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request = 
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate expected: %ld %ld", 
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
            ptl_md_t md;

            md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ompi_mtl_portals4.eager_limit,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                PtlMDRelease(ptl_request->md_h);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }

        } else {
            /* make sure the data is in the right place */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OMPI_SUCCESS != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_REPLY:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got reply event",
                             ptl_request->opcount, ptl_request->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            PtlMDRelease(ptl_request->md_h);
            goto callback_error;
        }
        /* set the status - most of this filled in right after issuing
           the PtlGet */
        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (ompi_mtl_portals4.protocol == rndv) {
            ptl_request->super.super.ompi_req->req_status._ucount +=
                ompi_mtl_portals4.eager_limit;
        }

        /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
        ret = ompi_mtl_datatype_unpack(ptl_request->convertor, 
                                       ptl_request->delivery_ptr, 
                                       ptl_request->super.super.ompi_req->req_status._ucount);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                __FILE__, __LINE__, ret);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
        }
        PtlMDRelease(ptl_request->md_h);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, reply",
                             ptl_request->opcount, ptl_request->hdr_data));
        ptl_request->super.super.completion_callback(&ptl_request->super.super);
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate unexpected: %ld %ld %d", 
                                msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor, 
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (ret < 0) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (PTL_OK != ret) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {
            ptl_md_t md;

            if (ev->mlength > 0) {
                /* if rndv or triggered, copy the eager part to the right place */
                memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
            }

            md.start = (char*) ptl_request->delivery_ptr + ev->mlength;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ev->mlength;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) getting long data",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ev->mlength,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                PtlMDRelease(ptl_request->md_h);
                goto callback_error;
            }
        }

        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = 
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}