/* the contig versions does not use the stack. They can easily retrieve * the status with just the informations from pConvertor->bConverted. */ int32_t opal_pack_homogeneous_contig_function( opal_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { dt_stack_t* pStack = pConv->pStack; unsigned char *source_base = NULL; uint32_t iov_count; size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted; OPAL_PTRDIFF_TYPE initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp); /* There are some optimizations that can be done if the upper level * does not provide a buffer. */ for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { if( 0 == length ) break; if( (size_t)iov[iov_count].iov_len > length ) iov[iov_count].iov_len = length; if( iov[iov_count].iov_base == NULL ) { iov[iov_count].iov_base = (IOVBASE_TYPE *) source_base; COMPUTE_CSUM( iov[iov_count].iov_base, iov[iov_count].iov_len, pConv ); } else { /* contiguous data just memcpy the smallest data in the user buffer */ OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, iov[iov_count].iov_len, pConv->pBaseBuf, pConv->pDesc, pConv->count ); MEMCPY_CSUM( iov[iov_count].iov_base, source_base, iov[iov_count].iov_len, pConv ); } length -= iov[iov_count].iov_len; pConv->bConverted += iov[iov_count].iov_len; pStack[0].disp += iov[iov_count].iov_len; source_base += iov[iov_count].iov_len; } /* update the return value */ *max_data = pConv->bConverted - initial_amount; *out_size = iov_count; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0; }
static inline void position_contiguous_loop( opal_convertor_t* CONVERTOR, dt_elem_desc_t* ELEM, uint32_t* COUNT, unsigned char** POINTER, size_t* SPACE ) { ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items); uint32_t _copy_loops = *(COUNT); if( (_copy_loops * _end_loop->size) > *(SPACE) ) _copy_loops = (uint32_t)(*(SPACE) / _end_loop->size); OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _end_loop->first_elem_disp, (_copy_loops - 1) * _loop->extent + _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); *(POINTER) += _copy_loops * _loop->extent; *(SPACE) -= _copy_loops * _end_loop->size; *(COUNT) -= _copy_loops; }
static inline void position_predefined_data( opal_convertor_t* CONVERTOR, dt_elem_desc_t* ELEM, uint32_t* COUNT, unsigned char** POINTER, size_t* SPACE ) { uint32_t _copy_count = *(COUNT); size_t _copy_blength; ddt_elem_desc_t* _elem = &((ELEM)->elem); _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; if( (_copy_count * _copy_blength) > *(SPACE) ) { _copy_count = (uint32_t)(*(SPACE) / _copy_blength); if( 0 == _copy_count ) return; /* nothing to do */ } _copy_blength *= _copy_count; OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _elem->disp, _copy_blength, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); *(POINTER) += (_copy_count * _elem->extent); *(SPACE) -= _copy_blength; *(COUNT) -= _copy_count; }
int32_t opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { const opal_datatype_t* pData = pConv->pDesc; dt_stack_t* pStack = pConv->pStack; unsigned char *user_memory, *packed_buffer; uint32_t i, index, iov_count; size_t max_allowed, total_bytes_converted = 0; OPAL_PTRDIFF_TYPE extent; OPAL_PTRDIFF_TYPE initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; extent = pData->ub - pData->lb; assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((OPAL_PTRDIFF_TYPE)pData->size != extent) ); /* Limit the amount of packed data to the data left over on this convertor */ max_allowed = pConv->local_size - pConv->bConverted; if( max_allowed > (*max_data) ) max_allowed = (*max_data); i = (uint32_t)(pConv->bConverted / pData->size); /* how many we already pack */ /* There are some optimizations that can be done if the upper level * does not provide a buffer. */ user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp; for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { if( 0 == max_allowed ) break; /* we're done this time */ if( iov[iov_count].iov_base == NULL ) { /* special case for small data. We avoid allocating memory if we * can fill the iovec directly with the address of the remaining * data. */ if( (uint32_t)pStack->count < ((*out_size) - iov_count) ) { pStack[1].count = pData->size - (pConv->bConverted % pData->size); for( index = iov_count; i < pConv->count; i++, index++ ) { iov[index].iov_base = (IOVBASE_TYPE *) user_memory; iov[index].iov_len = pStack[1].count; pStack[0].disp += extent; total_bytes_converted += pStack[1].count; pStack[1].disp = 0; /* reset it for the next round */ pStack[1].count = pData->size; user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp; COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); } *out_size = iov_count + index; pConv->bConverted += total_bytes_converted; *max_data = total_bytes_converted; pConv->flags |= CONVERTOR_COMPLETED; return 1; /* we're done */ } /* now special case for big contiguous data with gaps around */ if( pData->size >= IOVEC_MEM_LIMIT ) { /* as we dont have to copy any data, we can simply fill the iovecs * with data from the user data description. */ for( index = iov_count; (i < pConv->count) && (index < (*out_size)); i++, index++ ) { if( max_allowed < pData->size ) { iov[index].iov_base = (IOVBASE_TYPE *) user_memory; iov[index].iov_len = max_allowed; max_allowed = 0; COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); break; } else { iov[index].iov_base = (IOVBASE_TYPE *) user_memory; iov[index].iov_len = pData->size; user_memory += extent; COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv ); } max_allowed -= iov[index].iov_len; total_bytes_converted += iov[index].iov_len; } *out_size = index; *max_data = total_bytes_converted; pConv->bConverted += total_bytes_converted; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0; } } { uint32_t counter; size_t done; packed_buffer = (unsigned char *) iov[iov_count].iov_base; done = pConv->bConverted - i * pData->size; /* partial data from last pack */ if( done != 0 ) { /* still some data to copy from the last time */ done = pData->size - done; OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, done, pConv->pBaseBuf, pData, pConv->count ); MEMCPY_CSUM( packed_buffer, user_memory, done, pConv ); packed_buffer += done; max_allowed -= done; total_bytes_converted += done; user_memory += (extent - pData->size + done); } counter = (uint32_t)(max_allowed / pData->size); if( counter > pConv->count ) counter = pConv->count; for( i = 0; i < counter; i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, pData, pConv->count ); MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv ); packed_buffer+= pData->size; user_memory += extent; } done = (counter * pData->size); max_allowed -= done; total_bytes_converted += done; /* If there is anything pending ... */ if( 0 != max_allowed ) { done = max_allowed; OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, done, pConv->pBaseBuf, pData, pConv->count ); MEMCPY_CSUM( packed_buffer, user_memory, done, pConv ); packed_buffer += done; max_allowed = 0; total_bytes_converted += done; user_memory += done; } } } pStack[0].disp = (intptr_t)user_memory - (intptr_t)pConv->pBaseBuf - initial_displ; pStack[1].disp = max_allowed; *max_data = total_bytes_converted; pConv->bConverted += total_bytes_converted; *out_size = iov_count; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0; }