int32_t vprotocol_pessimist_sender_based_convertor_advance(opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data) { int ret; unsigned int i; size_t pending_length; mca_vprotocol_pessimist_send_request_t *ftreq; ftreq = VPESSIMIST_CONV_REQ(pConvertor); pConvertor->flags = ftreq->sb.conv_flags; pConvertor->fAdvance = ftreq->sb.conv_advance; ret = opal_convertor_pack(pConvertor, iov, out_size, max_data); V_OUTPUT_VERBOSE(39, "pessimist:\tsb\tpack\t%"PRIsize_t, *max_data); for(i = 0, pending_length = *max_data; pending_length > 0; i++) { assert(i < *out_size); MEMCPY((void *) ftreq->sb.cursor, iov[i].iov_base, iov[i].iov_len); pending_length -= iov[i].iov_len; ftreq->sb.cursor += iov[i].iov_len; } assert(pending_length == 0); pConvertor->flags &= ~CONVERTOR_NO_OP; pConvertor->fAdvance = &vprotocol_pessimist_sender_based_convertor_advance; return ret; }
static inline struct mca_btl_base_descriptor_t * mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags) { mca_btl_scif_base_frag_t *frag = NULL; uint32_t iov_count = 1; struct iovec iov; size_t max_size = *size; int rc; if (OPAL_LIKELY((mca_btl_scif_module.super.btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) && !opal_convertor_need_buffers (convertor) && reserve <= 128)) { /* inplace send */ void *data_ptr; opal_convertor_get_current_pointer (convertor, &data_ptr); (void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } frag->segments[0].seg_len = reserve; frag->segments[1].seg_addr.pval = data_ptr; frag->segments[1].seg_len = *size; frag->base.des_segment_count = 2; } else { /* buffered send */ (void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } if (*size) { iov.iov_len = *size; iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve); rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size); if (OPAL_UNLIKELY(rc < 0)) { mca_btl_scif_frag_return (frag); return NULL; } *size = max_size; } frag->segments[0].seg_len = reserve + *size; frag->base.des_segment_count = 1; } frag->base.des_segments = frag->segments; frag->base.order = order; frag->base.des_flags = flags; return &frag->base; }
int mca_pml_base_bsend_request_start(ompi_request_t* request) { mca_pml_base_send_request_t* sendreq = (mca_pml_base_send_request_t*)request; struct iovec iov; unsigned int iov_count; size_t max_data; int rc; if(sendreq->req_bytes_packed > 0) { /* has a buffer been provided */ OPAL_THREAD_LOCK(&mca_pml_bsend_mutex); if(NULL == mca_pml_bsend_addr) { sendreq->req_addr = NULL; OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_ERR_BUFFER; } /* allocate a buffer to hold packed message */ sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc( mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL); if(NULL == sendreq->req_addr) { /* release resources when request is freed */ sendreq->req_base.req_pml_complete = true; OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); return OMPI_ERR_BUFFER; } OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); /* The convertor is already initialized in the begining so we just have to * pack the data in the newly allocated buffer. */ iov.iov_base = (IOVBASE_TYPE*)sendreq->req_addr; iov.iov_len = sendreq->req_bytes_packed; iov_count = 1; max_data = iov.iov_len; if((rc = opal_convertor_pack( &sendreq->req_base.req_convertor, &iov, &iov_count, &max_data )) < 0) { return OMPI_ERROR; } /* setup convertor to point to packed buffer (at position zero) */ opal_convertor_prepare_for_send( &sendreq->req_base.req_convertor, &(ompi_mpi_packed.dt.super), max_data, sendreq->req_addr ); /* increment count of pending requests */ mca_pml_bsend_count++; } return OMPI_SUCCESS; }
static size_t mca_pml_yalla_stream_pack(void *buffer, size_t length, size_t offset, opal_convertor_t *convertor) { uint32_t iov_count; struct iovec iov; iov_count = 1; iov.iov_base = buffer; iov.iov_len = length; opal_convertor_set_position(convertor, &offset); opal_convertor_pack(convertor, &iov, &iov_count, &length); return length; }
static size_t ompi_mtl_mxm_stream_send(void *buffer, size_t length, size_t offset, void *context) { struct iovec iov; uint32_t iov_count = 1; opal_convertor_t *convertor = (opal_convertor_t *) context; iov.iov_len = length; iov.iov_base = buffer; opal_convertor_set_position(convertor, &offset); opal_convertor_pack(convertor, &iov, &iov_count, &length); return length; }
mca_btl_base_descriptor_t* mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base, struct mca_btl_base_endpoint_t* peer, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags) { struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; mca_btl_portals4_frag_t* frag; size_t max_data = *size; struct iovec iov; uint32_t iov_count = 1; int ret; OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_prepare_src NI=%d reserve=%ld size=%ld max_data=%ld\n", portals4_btl->interface_num, reserve, *size, max_data)); if (0 != reserve || 0 != opal_convertor_need_buffers(convertor)) { OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "mca_btl_portals4_prepare_src NEED BUFFERS or RESERVE\n")); frag = (mca_btl_portals4_frag_t*) mca_btl_portals4_alloc(btl_base, peer, MCA_BTL_NO_ORDER, max_data + reserve, flags); if (NULL == frag) { return NULL; } if (max_data + reserve > frag->size) { max_data = frag->size - reserve; } iov.iov_len = max_data; iov.iov_base = (unsigned char*) frag->segments[0].base.seg_addr.pval + reserve; ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); *size = max_data; if (ret < 0) { mca_btl_portals4_free(btl_base, (mca_btl_base_descriptor_t *) frag); return NULL; } frag->segments[0].base.seg_len = max_data + reserve; frag->base.des_segment_count = 1; } frag->base.des_segments = &frag->segments[0].base; frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; frag->base.order = MCA_BTL_NO_ORDER; return &frag->base; }
/** * Pack data * * @param btl (IN) BTL module */ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags) { mca_btl_smcuda_frag_t* frag; struct iovec iov; uint32_t iov_count = 1; size_t max_data = *size; int rc; if ( reserve + max_data <= mca_btl_smcuda_component.eager_limit ) { MCA_BTL_SMCUDA_FRAG_ALLOC_EAGER(frag); } else { MCA_BTL_SMCUDA_FRAG_ALLOC_MAX(frag); } if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } if( OPAL_UNLIKELY(reserve + max_data > frag->size) ) { max_data = frag->size - reserve; } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segment.seg_addr.pval)) + reserve); rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( OPAL_UNLIKELY(rc < 0) ) { MCA_BTL_SMCUDA_FRAG_RETURN(frag); return NULL; } frag->segment.seg_len = reserve + max_data; frag->base.des_segments = &frag->segment; frag->base.des_segment_count = 1; frag->base.order = MCA_BTL_NO_ORDER; frag->base.des_flags = flags; *size = max_data; return &frag->base; }
/** * Prepare data for send * * @param btl (IN) BTL module */ static struct mca_btl_base_descriptor_t *mca_btl_self_prepare_src (struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags) { bool inline_send = !opal_convertor_need_buffers(convertor); size_t buffer_len = reserve + (inline_send ? 0 : *size); mca_btl_self_frag_t *frag; frag = (mca_btl_self_frag_t *) mca_btl_self_alloc (btl, endpoint, order, buffer_len, flags); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } /* non-contigous data */ if (OPAL_UNLIKELY(!inline_send)) { struct iovec iov = {.iov_len = *size, .iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->data + reserve)}; size_t max_data = *size; uint32_t iov_count = 1; int rc; rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_data); if(rc < 0) { mca_btl_self_free (btl, &frag->base); return NULL; } *size = max_data; frag->segments[0].seg_len = reserve + max_data; } else { void *data_ptr; opal_convertor_get_current_pointer (convertor, &data_ptr); frag->segments[1].seg_addr.pval = data_ptr; frag->segments[1].seg_len = *size; frag->base.des_segment_count = 2; } return &frag->base; }
static int pack_segments( ompi_datatype_t* datatype, int count, size_t segment_size, ddt_segment_t* segments, int seg_count, void* buffer ) { size_t max_size, position; opal_convertor_t* convertor; struct iovec iov; int i; uint32_t iov_count; convertor = opal_convertor_create( opal_local_arch, 0 ); opal_convertor_prepare_for_send( convertor, &(datatype->super), count, buffer ); for( i = 0; i < seg_count; i++ ) { iov.iov_len = segments[i].size; iov.iov_base = segments[i].buffer; max_size = iov.iov_len; position = segments[i].position; opal_convertor_set_position( convertor, &position ); if( position != segments[i].position ) { opal_output( 0, "Setting position failed (%lu != %lu)\n", (unsigned long)segments[i].position, (unsigned long)position ); break; } iov_count = 1; opal_convertor_pack( convertor, &iov, &iov_count, &max_size ); if( max_size != segments[i].size ) { opal_output( 0, "Amount of packed data do not match (%lu != %lu)\n", (unsigned long)max_size, (unsigned long)segments[i].size ); opal_output( 0, "Segment %d position %lu size %lu\n", i, (unsigned long)segments[i].position, segments[i].size ); } } OBJ_RELEASE(convertor); return i; }
static inline __opal_attribute_always_inline__ int ompi_mtl_mxm_choose_send_datatype(mxm_send_req_t *mxm_send_req, opal_convertor_t *convertor) { struct iovec iov; uint32_t iov_count = 1; size_t *buffer_len = &mxm_send_req->base.data.buffer.length; opal_convertor_get_packed_size(convertor, buffer_len); if (0 == *buffer_len) { mxm_send_req->base.data.buffer.ptr = NULL; mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER; return OMPI_SUCCESS; } if (opal_convertor_need_buffers(convertor)) { mxm_send_req->base.context = convertor; mxm_send_req->base.data_type = MXM_REQ_DATA_STREAM; mxm_send_req->base.data.stream.length = *buffer_len; mxm_send_req->base.data.stream.cb = ompi_mtl_mxm_stream_send; return OMPI_SUCCESS; } mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER; iov.iov_base = NULL; iov.iov_len = *buffer_len; opal_convertor_pack(convertor, &iov, &iov_count, buffer_len); mxm_send_req->base.data.buffer.ptr = iov.iov_base; return OMPI_SUCCESS; }
static int local_copy_with_convertor( const opal_datatype_t const* pdt, int count, int chunk ) { OPAL_PTRDIFF_TYPE extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; opal_datatype_type_extent( pdt, &extent ); pdst = malloc( extent * count ); psrc = malloc( extent * count ); ptemp = malloc( chunk ); { int i = 0; for( ; i < (count * extent); ((char*)psrc)[i] = i % 128 + 32, i++ ); } memset( pdst, 0, count * extent ); send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying same data-type using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( NULL != send_convertor ) OBJ_RELEASE( send_convertor ); if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor ); if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OPAL_SUCCESS; }
static int local_copy_with_convertor( opal_datatype_t const * const pdt, int count, int chunk ) { OPAL_PTRDIFF_TYPE lb, extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; char *odst, *osrc; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data, length = 0, malloced_size; int32_t done1 = 0, done2 = 0, errors = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; malloced_size = compute_memory_size(pdt, count); opal_datatype_get_extent( pdt, &lb, &extent ); odst = (char*)malloc( malloced_size ); osrc = (char*)malloc( malloced_size ); ptemp = malloc( chunk ); { for( size_t i = 0; i < malloced_size; osrc[i] = i % 128 + 32, i++ ); memcpy(odst, osrc, malloced_size); } pdst = odst - lb; psrc = osrc - lb; send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; if( outputFlags & RESET_CONVERTORS ) { struct dt_stack_t stack[1+send_convertor->stack_pos]; int i, stack_pos = send_convertor->stack_pos; size_t pos; if( 0 == done1 ) { memcpy(stack, send_convertor->pStack, (1+send_convertor->stack_pos) * sizeof(struct dt_stack_t)); pos = 0; opal_convertor_set_position(send_convertor, &pos); pos = length; opal_convertor_set_position(send_convertor, &pos); assert(pos == length); for(i = 0; i <= stack_pos; i++ ) { if( stack[i].index != send_convertor->pStack[i].index ) {errors = 1; printf("send stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n", i, stack[i].index, send_convertor->pStack[i].index, length, pdt->size * count);} if( stack[i].count != send_convertor->pStack[i].count ) { if( stack[i].type == send_convertor->pStack[i].type ) { {errors = 1; printf("send stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n", i, stack[i].count, send_convertor->pStack[i].count, length, pdt->size * count);} } else { if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= send_convertor->pStack[i].type) ) {errors = 1; printf("send stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n", i, (int)stack[i].type, (int)send_convertor->pStack[i].type, length, pdt->size * count);} else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) != (send_convertor->pStack[i].count * opal_datatype_basicDatatypes[send_convertor->pStack[i].type]->size) ) {errors = 1; printf("send stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n", i, (int)stack[i].type, stack[i].count, (int)send_convertor->pStack[i].type, send_convertor->pStack[i].count, length, pdt->size * count);} } } if( stack[i].disp != send_convertor->pStack[i].disp ) {errors = 1; printf("send stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n", i, (void*)stack[i].disp, (void*)send_convertor->pStack[i].disp, length, pdt->size * count);} if(0 != errors) {assert(0); exit(-1);} } } if( 0 == done2 ) { memcpy(stack, recv_convertor->pStack, (1+recv_convertor->stack_pos) * sizeof(struct dt_stack_t)); pos = 0; opal_convertor_set_position(recv_convertor, &pos); pos = length; opal_convertor_set_position(recv_convertor, &pos); assert(pos == length); for(i = 0; i <= stack_pos; i++ ) { if( stack[i].index != recv_convertor->pStack[i].index ) {errors = 1; printf("recv stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n", i, stack[i].index, recv_convertor->pStack[i].index, length, pdt->size * count);} if( stack[i].count != recv_convertor->pStack[i].count ) { if( stack[i].type == recv_convertor->pStack[i].type ) { {errors = 1; printf("recv stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n", i, stack[i].count, recv_convertor->pStack[i].count, length, pdt->size * count);} } else { if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= recv_convertor->pStack[i].type) ) {errors = 1; printf("recv stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n", i, (int)stack[i].type, (int)recv_convertor->pStack[i].type, length, pdt->size * count);} else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) != (recv_convertor->pStack[i].count * opal_datatype_basicDatatypes[recv_convertor->pStack[i].type]->size) ) {errors = 1; printf("recv stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n", i, (int)stack[i].type, stack[i].count, (int)recv_convertor->pStack[i].type, recv_convertor->pStack[i].count, length, pdt->size * count);} } } if( stack[i].disp != recv_convertor->pStack[i].disp ) {errors = 1; printf("recv stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n", i, (void*)stack[i].disp, (void*)recv_convertor->pStack[i].disp, length, pdt->size * count);} if(0 != errors) {assert(0); exit(-1);} } } } } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying same data-type using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); if(outputFlags & VALIDATE_DATA) { for( size_t i = errors = 0; i < malloced_size; i++ ) { if( odst[i] != osrc[i] ) { printf("error at position %lu (%d != %d)\n", (unsigned long)i, (int)(odst[i]), (int)(osrc[i])); errors++; if(outputFlags & QUIT_ON_FIRST_ERROR) { opal_datatype_dump(pdt); assert(0); exit(-1); } } } if( 0 == errors ) { printf("Validation check succesfully passed\n"); } else { printf("Found %d errors. Giving up!\n", errors); exit(-1); } } clean_and_return: if( NULL != send_convertor ) OBJ_RELEASE( send_convertor ); if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor ); if( NULL != odst ) free( odst ); if( NULL != osrc ) free( osrc ); if( NULL != ptemp ) free( ptemp ); return (0 == errors ? OPAL_SUCCESS : errors); }
mca_spml_mkey_t *mca_spml_yoda_register(void* addr, size_t size, uint64_t shmid, int *count) { int i; mca_btl_base_descriptor_t* des = NULL; const opal_datatype_t *datatype = &opal_datatype_wchar; opal_convertor_t convertor; mca_spml_mkey_t *mkeys; struct yoda_btl *ybtl; oshmem_proc_t *proc_self; mca_spml_yoda_context_t* yoda_context; struct iovec iov; uint32_t iov_count = 1; SPML_VERBOSE(10, "address %p len %llu", addr, (unsigned long long)size); *count = 0; /* make sure everything is initialized to 0 */ mkeys = (mca_spml_mkey_t *) calloc(1, mca_spml_yoda.n_btls * sizeof(*mkeys)); if (!mkeys) { return NULL ; } proc_self = oshmem_proc_group_find(oshmem_group_all, oshmem_my_proc_id()); /* create convertor */ OBJ_CONSTRUCT(&convertor, opal_convertor_t); mca_bml.bml_register( MCA_SPML_YODA_PUT, mca_yoda_put_callback, NULL ); mca_bml.bml_register( MCA_SPML_YODA_GET, mca_yoda_get_callback, NULL ); mca_bml.bml_register( MCA_SPML_YODA_GET_RESPONSE, mca_yoda_get_response_callback, NULL ); /* Register proc memory in every rdma BTL. */ for (i = 0; i < mca_spml_yoda.n_btls; i++) { ybtl = &mca_spml_yoda.btl_type_map[i]; mkeys[i].va_base = addr; if (!ybtl->use_cnt) { SPML_VERBOSE(10, "%s: present but not in use. SKIP registration", btl_type2str(ybtl->btl_type)); continue; } /* If we have shared memory just save its id*/ if (YODA_BTL_SM == ybtl->btl_type && MEMHEAP_SHM_INVALID != (int) MEMHEAP_SHM_GET_ID(shmid)) { mkeys[i].u.key = shmid; mkeys[i].va_base = 0; continue; } yoda_context = calloc(1, sizeof(*yoda_context)); mkeys[i].spml_context = yoda_context; yoda_context->registration = NULL; if (NULL != ybtl->btl->btl_prepare_src) { /* initialize convertor for source descriptor*/ opal_convertor_copy_and_prepare_for_recv(proc_self->proc_convertor, datatype, size, addr, 0, &convertor); if (NULL != ybtl->btl->btl_mpool && NULL != ybtl->btl->btl_mpool->mpool_register) { iov.iov_len = size; iov.iov_base = NULL; opal_convertor_pack(&convertor, &iov, &iov_count, &size); ybtl->btl->btl_mpool->mpool_register(ybtl->btl->btl_mpool, iov.iov_base, size, 0, &yoda_context->registration); } /* initialize convertor for source descriptor*/ opal_convertor_copy_and_prepare_for_recv(proc_self->proc_convertor, datatype, size, addr, 0, &convertor); /* register source memory */ des = ybtl->btl->btl_prepare_src(ybtl->btl, 0, yoda_context->registration, &convertor, MCA_BTL_NO_ORDER, 0, &size, 0); if (NULL == des) { SPML_ERROR("%s: failed to register source memory. ", btl_type2str(ybtl->btl_type)); } yoda_context->btl_src_descriptor = des; mkeys[i].u.data = des->des_src; mkeys[i].len = ybtl->btl->btl_seg_size; } SPML_VERBOSE(5, "rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X", oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid)); } OBJ_DESTRUCT(&convertor); *count = mca_spml_yoda.n_btls; return mkeys; }
/** * Pack data * * @param btl (IN) BTL module */ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags) { const size_t total_size = reserve + *size; mca_btl_vader_frag_t *frag; unsigned char *fbox; void *data_ptr; int rc; opal_convertor_get_current_pointer (convertor, &data_ptr); /* in place send fragment */ if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) { uint32_t iov_count = 1; struct iovec iov; /* non-contiguous data requires the convertor */ if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism && total_size > mca_btl_vader.super.btl_eager_limit) { (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint); } else (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } iov.iov_len = *size; iov.iov_base = (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) + reserve); rc = opal_convertor_pack (convertor, &iov, &iov_count, size); if (OPAL_UNLIKELY(rc < 0)) { MCA_BTL_VADER_FRAG_RETURN(frag); return NULL; } frag->segments[0].seg_len = *size + reserve; } else { if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) { if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) { (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint); } else { (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint); } } else (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } #if OPAL_BTL_VADER_HAVE_XPMEM /* use xpmem to send this segment if it is above the max inline send size */ if (OPAL_UNLIKELY(MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism && total_size > (size_t) mca_btl_vader_component.max_inline_send)) { /* single copy send */ frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY; /* set up single copy io vector */ frag->hdr->sc_iov.iov_base = data_ptr; frag->hdr->sc_iov.iov_len = *size; frag->segments[0].seg_len = reserve; frag->segments[1].seg_len = *size; frag->segments[1].seg_addr.pval = data_ptr; frag->base.des_segment_count = 2; } else { #endif /* inline send */ if (OPAL_LIKELY(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP & flags)) { /* try to reserve a fast box for this transfer only if the * fragment does not belong to the caller */ fbox = mca_btl_vader_reserve_fbox (endpoint, total_size); if (OPAL_LIKELY(fbox)) { frag->segments[0].seg_addr.pval = fbox; } frag->fbox = fbox; } /* NTH: the covertor adds some latency so we bypass it here */ memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size); frag->segments[0].seg_len = total_size; #if OPAL_BTL_VADER_HAVE_XPMEM } #endif } frag->base.order = order; frag->base.des_flags = flags; return &frag->base; }
static int local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, int send_count, opal_datatype_t const * const recv_type, int recv_count, int chunk ) { OPAL_PTRDIFF_TYPE send_lb, send_extent, recv_lb, recv_extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; char *odst, *osrc; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data, length = 0, send_malloced_size, recv_malloced_size;; int32_t done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; send_malloced_size = compute_memory_size(send_type, send_count); recv_malloced_size = compute_memory_size(recv_type, recv_count); opal_datatype_get_extent( send_type, &send_lb, &send_extent ); opal_datatype_get_extent( recv_type, &recv_lb, &recv_extent ); odst = (char*)malloc( recv_malloced_size ); osrc = (char*)malloc( send_malloced_size ); ptemp = malloc( chunk ); /* fill up the receiver with ZEROS */ { for( size_t i = 0; i < send_malloced_size; i++ ) osrc[i] = i % 128 + 32; } memset( odst, 0, recv_malloced_size ); pdst = odst - recv_lb; psrc = osrc - send_lb; send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; if( outputFlags & RESET_CONVERTORS ) { size_t pos = 0; opal_convertor_set_position(send_convertor, &pos); pos = length; opal_convertor_set_position(send_convertor, &pos); assert(pos == length); pos = 0; opal_convertor_set_position(recv_convertor, &pos); pos = length; opal_convertor_set_position(recv_convertor, &pos); assert(pos == length); } } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying different data-types using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( send_convertor != NULL ) { OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL ); } if( recv_convertor != NULL ) { OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL ); } if( NULL != odst ) free( odst ); if( NULL != osrc ) free( osrc ); if( NULL != ptemp ) free( ptemp ); return OPAL_SUCCESS; }
/** * Pack data * * @param btl (IN) BTL module */ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, mca_mpool_base_registration_t* registration, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags) { mca_btl_sm_frag_t* frag; struct iovec iov; uint32_t iov_count = 1; size_t max_data = *size; int rc; #if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA mca_btl_sm_t* sm_btl = (mca_btl_sm_t*)btl; (void)sm_btl; if( (0 != reserve) || ( OPAL_UNLIKELY(!mca_btl_sm_component.use_knem) && OPAL_UNLIKELY(!mca_btl_sm_component.use_cma)) ) { #endif /* OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA */ if ( reserve + max_data <= mca_btl_sm_component.eager_limit ) { MCA_BTL_SM_FRAG_ALLOC_EAGER(frag); } else { MCA_BTL_SM_FRAG_ALLOC_MAX(frag); } if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } if( OPAL_UNLIKELY(reserve + max_data > frag->size) ) { max_data = frag->size - reserve; } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segment.base.seg_addr.pval)) + reserve); rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( OPAL_UNLIKELY(rc < 0) ) { MCA_BTL_SM_FRAG_RETURN(frag); return NULL; } frag->segment.base.seg_len = reserve + max_data; #if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA } else { #if OMPI_BTL_SM_HAVE_KNEM struct knem_cmd_create_region knem_cr; struct knem_cmd_param_iovec knem_iov; #endif /* OMPI_BTL_SM_HAVE_KNEM */ MCA_BTL_SM_FRAG_ALLOC_USER(frag); if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } iov.iov_len = max_data; iov.iov_base = NULL; rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data); if( OPAL_UNLIKELY(rc < 0) ) { MCA_BTL_SM_FRAG_RETURN(frag); return NULL; } frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base; frag->segment.base.seg_len = max_data; #if OMPI_BTL_SM_HAVE_KNEM if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) { knem_iov.base = (uintptr_t)iov.iov_base; knem_iov.len = max_data; knem_cr.iovec_array = (uintptr_t)&knem_iov; knem_cr.iovec_nr = iov_count; knem_cr.protection = PROT_READ; knem_cr.flags = KNEM_FLAG_SINGLEUSE; if (OPAL_UNLIKELY(ioctl(sm_btl->knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) { return NULL; } frag->segment.key = knem_cr.cookie; } #endif /* OMPI_BTL_SM_HAVE_KNEM */ #if OMPI_BTL_SM_HAVE_CMA if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) { /* Encode the pid as the key */ frag->segment.key = getpid(); } #endif /* OMPI_BTL_SM_HAVE_CMA */ } #endif /* OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA */ frag->base.des_src = &(frag->segment.base); frag->base.des_src_cnt = 1; frag->base.order = MCA_BTL_NO_ORDER; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; frag->base.des_flags = flags; *size = max_data; return &frag->base; }
/** * Initiate an inline send to the peer. * * @param btl (IN) BTL module * @param peer (IN) BTL peer addressing */ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor) { size_t length = (header_size + payload_size); mca_btl_vader_frag_t *frag; uint32_t iov_count = 1; struct iovec iov; size_t max_data; void *data_ptr = NULL; assert (length < mca_btl_vader_component.eager_limit); assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)); /* we won't ever return a descriptor */ *descriptor = NULL; if (OPAL_LIKELY(!(payload_size && opal_convertor_need_buffers (convertor)))) { if (payload_size) { opal_convertor_get_current_pointer (convertor, &data_ptr); } if (mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) { return OMPI_SUCCESS; } } /* allocate a fragment, giving up if we can't get one */ frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length, flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); if (OPAL_UNLIKELY(NULL == frag)) { return OMPI_ERR_OUT_OF_RESOURCE; } /* fill in fragment fields */ frag->hdr->len = length; frag->hdr->tag = tag; /* write the match header (with MPI comm/tag/etc. info) */ memcpy (frag->segment.seg_addr.pval, header, header_size); /* write the message data if there is any */ /* We can add MEMCHECKER calls before and after the packing. */ /* we can't use single-copy semantics here since as caller will consider the send complete if we return success */ if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) { /* pack the data into the supplied buffer */ iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size); iov.iov_len = max_data = payload_size; (void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data); assert (max_data == payload_size); } else if (payload_size) { /* bypassing the convertor may speed things up a little */ opal_convertor_get_current_pointer (convertor, &data_ptr); memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size); } opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag); /* write the fragment pointer to peer's the FIFO */ vader_fifo_write (frag->hdr, endpoint->peer_smp_rank); /* the progress function will return the fragment */ return OMPI_SUCCESS; }
/** * Pack data * * @param btl (IN) BTL module */ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, mca_mpool_base_registration_t *registration, struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags) { const size_t total_size = reserve + *size; struct iovec iov; mca_btl_vader_frag_t *frag; uint32_t iov_count = 1; void *data_ptr, *fbox_ptr; int rc; opal_convertor_get_current_pointer (convertor, &data_ptr); if (OPAL_LIKELY(reserve)) { /* in place send fragment */ if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) { /* non-contiguous data requires the convertor */ (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } iov.iov_len = *size; iov.iov_base = (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) + reserve); rc = opal_convertor_pack (convertor, &iov, &iov_count, size); if (OPAL_UNLIKELY(rc < 0)) { MCA_BTL_VADER_FRAG_RETURN(frag); return NULL; } frag->segments[0].seg_len = total_size; } else { (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } if (total_size > (size_t) mca_btl_vader_component.max_inline_send) { /* single copy send */ frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY; /* set up single copy io vector */ frag->hdr->sc_iov.iov_base = data_ptr; frag->hdr->sc_iov.iov_len = *size; frag->segments[0].seg_len = reserve; frag->segments[1].seg_len = *size; frag->segments[1].seg_addr.pval = data_ptr; frag->base.des_src_cnt = 2; } else { /* inline send */ /* try to reserve a fast box for this transfer */ fbox_ptr = mca_btl_vader_reserve_fbox (endpoint, total_size); if (fbox_ptr) { frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX; frag->segments[0].seg_addr.pval = fbox_ptr; } /* NTH: the covertor adds some latency so we bypass it here */ vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size); frag->segments[0].seg_len = total_size; } } } else { /* put/get fragment */ (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag); if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr; frag->segments[0].seg_len = total_size; } frag->base.order = order; frag->base.des_flags = flags; frag->endpoint = endpoint; return &frag->base; }
int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request) { int ret = OMPI_SUCCESS; mca_ompio_request_t *ompio_req=NULL; size_t spc=0; if (fh->f_amode & MPI_MODE_RDONLY){ // opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n"); ret = MPI_ERR_READ_ONLY; return ret; } mca_common_ompio_request_alloc ( &ompio_req, MCA_OMPIO_REQUEST_WRITE); if ( 0 == count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; ompi_request_complete (&ompio_req->req_ompi, false); *request = (ompi_request_t *) ompio_req; return OMPI_SUCCESS; } if ( NULL != fh->f_fbtl->fbtl_ipwritev ) { /* This fbtl has support for non-blocking operations */ uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; size_t max_data = 0; size_t total_bytes_written =0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { size_t pos=0; char *tbuf=NULL; opal_convertor_t convertor; OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count); opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos ); opal_convertor_cleanup (&convertor); ompio_req->req_tbuf = tbuf; ompio_req->req_size = max_data; } else { mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); } #else mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); #endif if ( 0 < max_data && 0 == fh->f_iov_count ) { ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; ompio_req->req_ompi.req_status._ucount = 0; ompi_request_complete (&ompio_req->req_ompi, false); *request = (ompi_request_t *) ompio_req; return OMPI_SUCCESS; } j = fh->f_index_in_file_view; /* Non blocking operations have to occur in a single cycle */ mca_common_ompio_build_io_array ( fh, 0, // index of current cycle iteration 1, // number of cycles max_data, // setting bytes_per_cycle to max_data max_data, iov_count, decoded_iov, &i, &j, &total_bytes_written, &spc); if (fh->f_num_of_io_entries) { fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req); } mca_common_ompio_register_progress (); fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } if (NULL != decoded_iov) { free (decoded_iov); decoded_iov = NULL; } } else { // This fbtl does not support non-blocking write operations ompi_status_public_t status; ret = mca_common_ompio_file_write(fh,buf,count,datatype, &status); ompio_req->req_ompi.req_status.MPI_ERROR = ret; ompio_req->req_ompi.req_status._ucount = status._ucount; ompi_request_complete (&ompio_req->req_ompi, false); } *request = (ompi_request_t *) ompio_req; return ret; }
static int local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count, ompi_datatype_t* recv_type, int recv_count, int chunk ) { void *pdst = NULL, *psrc = NULL, *ptemp = NULL; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; size_t slength, rlength; rlength = compute_buffer_length(recv_type, recv_count); slength = compute_buffer_length(send_type, send_count); pdst = malloc( rlength ); psrc = malloc( slength ); ptemp = malloc( chunk ); /* initialize the buffers to prevent valgrind from complaining */ for( int i = 0; i < slength; i++ ) ((char*)psrc)[i] = i % 128 + 32; memset(pdst, 0, rlength); send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, &(send_type->super), send_count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, &(recv_type->super), recv_count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying different data-types using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( send_convertor != NULL ) { OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL ); } if( recv_convertor != NULL ) { OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL ); } if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OMPI_SUCCESS; }
static int setup_scatter_buffers_linear(struct ompi_communicator_t *comm, ompi_coll_portals4_request_t *request, mca_coll_portals4_module_t *portals4_module) { int ret, line; int8_t i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter, request->u.scatter.pack_src_buf, ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), request->u.scatter.pack_src_count, request->u.scatter.pack_src_dtype); opal_convertor_get_packed_size(&request->u.scatter.send_converter, &request->u.scatter.packed_size); OBJ_DESTRUCT(&request->u.scatter.send_converter); /**********************************/ /* Setup Scatter Buffers */ /**********************************/ if (i_am_root) { /* * calculate the total size of the packed data */ request->u.scatter.scatter_bytes=request->u.scatter.packed_size * (ptrdiff_t)request->u.scatter.size; /* all transfers done using request->u.scatter.sdtype. * allocate temp buffer for recv, copy and/or rotate data at the end */ request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes); if (NULL == request->u.scatter.scatter_buf) { ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } request->u.scatter.free_after = 1; for (int32_t i=0; i<request->u.scatter.size; i++) { uint32_t iov_count = 1; struct iovec iov; size_t max_data; uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * i; opal_output_verbose(30, ompi_coll_base_framework.framework_output, "%s:%d:rank(%d): offset(%lu)", __FILE__, __LINE__, request->u.scatter.my_rank, offset); ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter, request->u.scatter.pack_src_buf + offset, ompi_comm_peer_lookup(comm, request->u.scatter.my_rank), request->u.scatter.pack_src_count, request->u.scatter.pack_src_dtype); iov.iov_len = request->u.scatter.packed_size; iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.scatter.scatter_buf + (request->u.scatter.packed_size*i)); opal_convertor_pack(&request->u.scatter.send_converter, &iov, &iov_count, &max_data); OBJ_DESTRUCT(&request->u.scatter.send_converter); } opal_output_verbose(30, ompi_coll_base_framework.framework_output, "%s:%d:rank(%d): root - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld) * size(%d)", __FILE__, __LINE__, request->u.scatter.my_rank, request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes, request->u.scatter.packed_size, request->u.scatter.size); } else { request->u.scatter.scatter_bytes=request->u.scatter.packed_size; request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes); if (NULL == request->u.scatter.scatter_buf) { ret = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hdlr; } request->u.scatter.free_after = 1; opal_output_verbose(30, ompi_coll_base_framework.framework_output, "%s:%d:rank(%d): leaf - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld)", __FILE__, __LINE__, request->u.scatter.my_rank, request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes, request->u.scatter.packed_size); } return OMPI_SUCCESS; err_hdlr: opal_output(ompi_coll_base_framework.framework_output, "%s:%4d:%4d\tError occurred ret=%d, rank %2d", __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); return ret; }
int mca_common_ompio_file_write (ompio_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, ompi_status_public_t *status) { int ret = OMPI_SUCCESS; int index = 0; int cycles = 0; uint32_t iov_count = 0; struct iovec *decoded_iov = NULL; size_t bytes_per_cycle=0; size_t total_bytes_written = 0; size_t max_data=0, real_bytes_written=0; ssize_t ret_code=0; size_t spc=0; int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file view iovec */ if (fh->f_amode & MPI_MODE_RDONLY){ // opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n"); ret = MPI_ERR_READ_ONLY; return ret; } if ( 0 == count ) { if ( MPI_STATUS_IGNORE != status ) { status->_ucount = 0; } return ret; } #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { size_t pos=0; char *tbuf=NULL; opal_convertor_t convertor; OMPIO_CUDA_PREPARE_BUF(fh,buf,count,datatype,tbuf,&convertor,max_data,decoded_iov,iov_count); opal_convertor_pack (&convertor, decoded_iov, &iov_count, &pos ); opal_convertor_cleanup ( &convertor); } else { mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); } #else mca_common_ompio_decode_datatype (fh, datatype, count, buf, &max_data, &decoded_iov, &iov_count); #endif if ( 0 < max_data && 0 == fh->f_iov_count ) { if ( MPI_STATUS_IGNORE != status ) { status->_ucount = 0; } return OMPI_SUCCESS; } if ( -1 == OMPIO_MCA_GET(fh, cycle_buffer_size )) { bytes_per_cycle = max_data; } else { bytes_per_cycle = OMPIO_MCA_GET(fh, cycle_buffer_size); } cycles = ceil((double)max_data/bytes_per_cycle); #if 0 printf ("Bytes per Cycle: %d Cycles: %d\n", bytes_per_cycle, cycles); #endif j = fh->f_index_in_file_view; for (index = 0; index < cycles; index++) { mca_common_ompio_build_io_array ( fh, index, cycles, bytes_per_cycle, max_data, iov_count, decoded_iov, &i, &j, &total_bytes_written, &spc); if (fh->f_num_of_io_entries) { ret_code =fh->f_fbtl->fbtl_pwritev (fh); if ( 0<= ret_code ) { real_bytes_written+= (size_t)ret_code; } } fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } } #if OPAL_CUDA_SUPPORT if ( is_gpu && !is_managed ) { mca_common_ompio_release_buf (fh, decoded_iov->iov_base); } #endif if (NULL != decoded_iov) { free (decoded_iov); decoded_iov = NULL; } if ( MPI_STATUS_IGNORE != status ) { status->_ucount = real_bytes_written; } return ret; }
/** * Pack data and return a descriptor that can be * used for send/put. * * @param btl (IN) BTL module * @param peer (IN) BTL peer addressing */ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags) { mca_btl_tcp_frag_t* frag; struct iovec iov; uint32_t iov_count = 1; size_t max_data = *size; int rc; if( OPAL_UNLIKELY(max_data > UINT32_MAX) ) { /* limit the size to what we support */ max_data = (size_t)UINT32_MAX; } /* * if we aren't pinning the data and the requested size is less * than the eager limit pack into a fragment from the eager pool */ if (max_data+reserve <= btl->btl_eager_limit) { MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag); } else { /* * otherwise pack as much data as we can into a fragment * that is the max send size. */ MCA_BTL_TCP_FRAG_ALLOC_MAX(frag); } if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } frag->segments[0].seg_addr.pval = (frag + 1); frag->segments[0].seg_len = reserve; frag->base.des_segment_count = 1; if(opal_convertor_need_buffers(convertor)) { if (max_data + reserve > frag->size) { max_data = frag->size - reserve; } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag->segments[0].seg_addr.pval)) + reserve); rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( OPAL_UNLIKELY(rc < 0) ) { mca_btl_tcp_free(btl, &frag->base); return NULL; } frag->segments[0].seg_len += max_data; } else { iov.iov_len = max_data; iov.iov_base = NULL; rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( OPAL_UNLIKELY(rc < 0) ) { mca_btl_tcp_free(btl, &frag->base); return NULL; } frag->segments[1].seg_addr.pval = iov.iov_base; frag->segments[1].seg_len = max_data; frag->base.des_segment_count = 2; } frag->base.des_segments = frag->segments; frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; *size = max_data; return &frag->base; }
/** * Initiate an inline send to the peer. * * @param btl (IN) BTL module * @param peer (IN) BTL peer addressing */ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor) { mca_btl_vader_frag_t *frag; void *data_ptr = NULL; size_t length; if (payload_size) { opal_convertor_get_current_pointer (convertor, &data_ptr); } if (!(payload_size && opal_convertor_need_buffers (convertor)) && mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) { return OMPI_SUCCESS; } length = header_size + payload_size; /* allocate a fragment, giving up if we can't get one */ frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length, flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); if (OPAL_UNLIKELY(NULL == frag)) { *descriptor = NULL; return OMPI_ERR_OUT_OF_RESOURCE; } /* fill in fragment fields */ frag->hdr->len = length; frag->hdr->tag = tag; /* write the match header (with MPI comm/tag/etc. info) */ memcpy (frag->segments[0].seg_addr.pval, header, header_size); /* write the message data if there is any */ /* we can't use single-copy semantics here since as caller will consider the send complete when we return */ if (payload_size) { uint32_t iov_count = 1; struct iovec iov; /* pack the data into the supplied buffer */ iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size); iov.iov_len = length = payload_size; (void) opal_convertor_pack (convertor, &iov, &iov_count, &length); assert (length == payload_size); } /* write the fragment pointer to peer's the FIFO. the progress function will return the fragment */ vader_fifo_write_ep (frag->hdr, endpoint); return OMPI_SUCCESS; }
/** * Pack data and return a descriptor that can be * used for send/put. * * @param btl (IN) BTL module * @param peer (IN) BTL peer addressing */ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, struct mca_mpool_base_registration_t* registration, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags ) { mca_btl_udapl_frag_t* frag = NULL; struct iovec iov; uint32_t iov_count = 1; size_t max_data = *size; int rc; int pad = 0; /* compute pad as needed */ MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, (max_data + reserve + sizeof(mca_btl_udapl_footer_t))); if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) { if(registration != NULL || max_data > btl->btl_max_send_size) { MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag); if(NULL == frag){ return NULL; } iov.iov_len = max_data; iov.iov_base = NULL; opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); *size = max_data; if(NULL == registration) { rc = btl->btl_mpool->mpool_register(btl->btl_mpool, iov.iov_base, max_data, 0, ®istration); if(rc != OMPI_SUCCESS) { MCA_BTL_UDAPL_FRAG_RETURN_USER(btl,frag); return NULL; } /* keep track of the registration we did */ frag->registration = (mca_btl_udapl_reg_t*)registration; } frag->segment.base.seg_len = max_data; frag->segment.base.seg_addr.pval = iov.iov_base; frag->triplet.segment_length = max_data; frag->triplet.virtual_address = (DAT_VADDR)(uintptr_t)iov.iov_base; frag->triplet.lmr_context = ((mca_btl_udapl_reg_t*)registration)->lmr_triplet.lmr_context; /* initialize base descriptor */ frag->base.des_src = &frag->segment; frag->base.des_src_cnt = 1; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; return &frag->base; } } if(max_data + pad + reserve <= btl->btl_eager_limit) { /* the data is small enough to fit in the eager frag and * memory is not prepinned */ MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(btl, frag); } if(NULL == frag) { /* the data doesn't fit into eager frag or eager frag is * not available */ MCA_BTL_UDAPL_FRAG_ALLOC_MAX(btl, frag); if(NULL == frag) { return NULL; } if(max_data + reserve > btl->btl_max_send_size) { max_data = btl->btl_max_send_size - reserve; } } iov.iov_len = max_data; iov.iov_base = (char *) frag->segment.base.seg_addr.pval + reserve; rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if(rc < 0) { MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag); return NULL; } *size = max_data; /* setup lengths and addresses to send out data */ frag->segment.base.seg_len = max_data + reserve; frag->triplet.segment_length = max_data + reserve + sizeof(mca_btl_udapl_footer_t); frag->triplet.virtual_address = (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval; /* initialize base descriptor */ frag->base.des_src = &frag->segment; frag->base.des_src_cnt = 1; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; return &frag->base; }
/* * opal_datatype_sndrcv * * Function: - copy MPI message from buffer into another * - send/recv done if cannot optimize * Accepts: - send buffer * - send count * - send datatype * - receive buffer * - receive count * - receive datatype * - tag * - communicator * Returns: - MPI_SUCCESS or error code */ int32_t ompi_datatype_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype) { opal_convertor_t send_convertor, recv_convertor; struct iovec iov; int length, completed; uint32_t iov_count; size_t max_data; /* First check if we really have something to do */ if (0 == rcount || 0 == rdtype->super.size) { return ((0 == scount || 0 == sdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE); } /* If same datatypes used, just copy. */ if (sdtype == rdtype) { int32_t count = ( scount < rcount ? scount : rcount ); opal_datatype_copy_content_same_ddt(&(rdtype->super), count, (char*)rbuf, (char*)sbuf); return ((scount > rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } /* If receive packed. */ if (rdtype->id == OMPI_DATATYPE_MPI_PACKED) { OBJ_CONSTRUCT( &send_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor, &(sdtype->super), scount, sbuf, 0, &send_convertor ); iov_count = 1; iov.iov_base = (IOVBASE_TYPE*)rbuf; iov.iov_len = scount * sdtype->super.size; if( (int32_t)iov.iov_len > rcount ) iov.iov_len = rcount; opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT( &send_convertor ); return ((max_data < (size_t)rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } /* If send packed. */ if (sdtype->id == OMPI_DATATYPE_MPI_PACKED) { OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(rdtype->super), rcount, rbuf, 0, &recv_convertor ); iov_count = 1; iov.iov_base = (IOVBASE_TYPE*)sbuf; iov.iov_len = rcount * rdtype->super.size; if( (int32_t)iov.iov_len > scount ) iov.iov_len = scount; opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT( &recv_convertor ); return (((size_t)scount > max_data) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } iov.iov_len = length = 64 * 1024; iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) ); OBJ_CONSTRUCT( &send_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor, &(sdtype->super), scount, sbuf, 0, &send_convertor ); OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(rdtype->super), rcount, rbuf, 0, &recv_convertor ); completed = 0; while( !completed ) { iov.iov_len = length; iov_count = 1; max_data = length; completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data ); completed |= opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data ); } free( iov.iov_base ); OBJ_DESTRUCT( &send_convertor ); OBJ_DESTRUCT( &recv_convertor ); return ( (scount * sdtype->super.size) <= (rcount * rdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE ); }
/** * Prepare data for send/put * * @param btl (IN) BTL module */ struct mca_btl_base_descriptor_t* mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, mca_mpool_base_registration_t* registration, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags ) { mca_btl_self_frag_t* frag; struct iovec iov; uint32_t iov_count = 1; size_t max_data = *size; int rc; /* non-contigous data */ if( opal_convertor_need_buffers(convertor) || max_data < mca_btl_self.btl_max_send_size || reserve != 0 ) { MCA_BTL_SELF_FRAG_ALLOC_SEND(frag); if(OPAL_UNLIKELY(NULL == frag)) { return NULL; } if(reserve + max_data > frag->size) { max_data = frag->size - reserve; } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)(frag+1) + reserve); rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if(rc < 0) { MCA_BTL_SELF_FRAG_RETURN_SEND(frag); return NULL; } frag->segment.seg_addr.pval = frag+1; frag->segment.seg_len = reserve + max_data; *size = max_data; } else { MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag); if(OPAL_UNLIKELY(NULL == frag)) { return NULL; } iov.iov_len = max_data; iov.iov_base = NULL; /* convertor should return offset into users buffer */ rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if(rc < 0) { MCA_BTL_SELF_FRAG_RETURN_RDMA(frag); return NULL; } frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base; frag->segment.seg_len = max_data; *size = max_data; } frag->base.des_flags = flags; frag->base.des_src = &frag->segment; frag->base.des_src_cnt = 1; return &frag->base; }
/** * Pack data and return a descriptor that can be * used for send/put. * * @param btl (IN) BTL module * @param peer (IN) BTL peer addressing */ mca_btl_base_descriptor_t* mca_btl_sctp_prepare_src( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, struct mca_mpool_base_registration_t* registration, struct opal_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags) { mca_btl_sctp_frag_t* frag; struct iovec iov; uint32_t iov_count = 1; size_t max_data = *size; int rc; /* * if we aren't pinning the data and the requested size is less * than the eager limit pack into a fragment from the eager pool */ if (max_data+reserve <= btl->btl_eager_limit) { MCA_BTL_SCTP_FRAG_ALLOC_EAGER(frag, rc); } /* * otherwise pack as much data as we can into a fragment * that is the max send size. */ else { MCA_BTL_SCTP_FRAG_ALLOC_MAX(frag, rc); } if(NULL == frag) { return NULL; } if(max_data == 0) { frag->segments[0].seg_addr.pval = (frag + 1); frag->segments[0].seg_len = reserve; frag->base.des_src_cnt = 1; } else if(opal_convertor_need_buffers(convertor)) { if (max_data + reserve > frag->size) { max_data = frag->size - reserve; } iov.iov_len = max_data; iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)(frag+1)) + reserve); rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( rc < 0 ) { mca_btl_sctp_free(btl, &frag->base); return NULL; } frag->segments[0].seg_addr.pval = (frag + 1); frag->segments[0].seg_len = max_data + reserve; frag->base.des_src_cnt = 1; } else { iov.iov_len = max_data; iov.iov_base = NULL; rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); if( rc < 0 ) { mca_btl_sctp_free(btl, &frag->base); return NULL; } frag->segments[0].seg_addr.pval = frag+1; frag->segments[0].seg_len = reserve; frag->segments[1].seg_addr.pval = iov.iov_base; frag->segments[1].seg_len = max_data; frag->base.des_src_cnt = 2; } frag->base.des_src = frag->segments; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; frag->base.des_flags = flags; *size = max_data; return &frag->base; }