int32_t ompi_ddt_set_element_count( const ompi_datatype_t* datatype, uint32_t count, size_t* length ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ int32_t stack_pos = 0; uint32_t local_length = 0; dt_elem_desc_t* pElems; /** * Handle all complete multiple of the datatype. */ for( pos_desc = 4; pos_desc < DT_MAX_PREDEFINED; pos_desc++ ) { local_length += datatype->btypes[pos_desc]; } pos_desc = count / local_length; count = count % local_length; *length = datatype->size * pos_desc; if( 0 == count ) { return 0; } DUMP( "dt_set_element_count( %p, %d )\n", (void*)datatype, count ); pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; pElems = datatype->desc.desc; pos_desc = 0; while( 1 ) { /* loop forever the exit condition is on the last DT_END_LOOP */ if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */ if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return 0; } pos_desc = pStack->index + 1; continue; } if( DT_LOOP == pElems[pos_desc].elem.common.type ) { ddt_loop_desc_t* loop = &(pElems[pos_desc].loop); do { PUSH_STACK( pStack, stack_pos, pos_desc, DT_LOOP, loop->loops, 0 ); pos_desc++; } while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */ DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" ); } while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) { /* now here we have a basic datatype */ const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]); local_length = pElems[pos_desc].elem.count; if( local_length >= count ) { *length += count * basic_type->size; return 0; } *length += local_length * basic_type->size; count -= local_length; pos_desc++; /* advance to the next data */ } } }
/* Get the number of elements from the data-type that can be * retrieved from a received buffer with the size iSize. * To speed-up this function you should use it with a iSize == to the modulo * of the original size and the size of the data. * Return value: * positive = number of basic elements inside * negative = some error occurs */ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, size_t iSize ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ int32_t nbElems = 0, stack_pos = 0; size_t local_size; dt_elem_desc_t* pElems; /* Normally the size should be less or equal to the size of the datatype. * This function does not support a iSize bigger than the size of the datatype. */ assert( (uint32_t)iSize <= datatype->size ); DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize ); pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; pElems = datatype->desc.desc; pos_desc = 0; while( 1 ) { /* loop forever the exit condition is on the last DT_END_LOOP */ if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */ if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return nbElems; /* completed */ } pos_desc = pStack->index + 1; continue; } if( DT_LOOP == pElems[pos_desc].elem.common.type ) { ddt_loop_desc_t* loop = &(pElems[pos_desc].loop); do { PUSH_STACK( pStack, stack_pos, pos_desc, DT_LOOP, loop->loops, 0 ); pos_desc++; } while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */ DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" ); } while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) { /* now here we have a basic datatype */ const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]); local_size = pElems[pos_desc].elem.count * basic_type->size; if( local_size >= iSize ) { local_size = iSize / basic_type->size; nbElems += (int32_t)local_size; iSize -= local_size * basic_type->size; return (iSize == 0 ? nbElems : -1); } nbElems += pElems[pos_desc].elem.count; iSize -= local_size; pos_desc++; /* advance to the next data */ } } }
/* Convert data from multiple input buffers (as received from the network layer) * to a contiguous output buffer with a predefined size. * return OPAL_SUCCESS if everything went OK and if there is still room before the complete * conversion of the data (need additional call with others input buffers ) * 1 if everything went fine and the data was completly converted * -1 something wrong occurs. */ int32_t opal_unpack_general_function( opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ int32_t count_desc; /* the number of items already done in the actual pos_desc */ int type = OPAL_DATATYPE_INT8; /* type at current position */ OPAL_PTRDIFF_TYPE advance; /* number of bytes that we should advance the buffer */ OPAL_PTRDIFF_TYPE disp_desc = 0; /* compute displacement for truncated data */ size_t bConverted = 0; /* number of bytes converted this time */ const opal_convertor_master_t* master = pConvertor->master; dt_elem_desc_t* description; OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; size_t oCount = extent * pConvertor->count; size_t iCount, total_bytes_converted = 0; char* pInput; int32_t rc; uint32_t iov_count; /* For the general case always use the user data description */ description = pConvertor->use_desc->desc; pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; count_desc = (int32_t)pStack->count; disp_desc = pStack->disp; pStack--; pConvertor->stack_pos--; DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, description, "starting" ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { bConverted = 0; pInput = iov[iov_count].iov_base; iCount = iov[iov_count].iov_len; while( 1 ) { if( OPAL_DATATYPE_END_LOOP == description[pos_desc].elem.common.type ) { /* end of the current loop */ if( --(pStack->count) == 0 ) { /* end of loop */ if( pConvertor->stack_pos == 0 ) { goto save_and_return; /* completed */ } pConvertor->stack_pos--; pStack--; pos_desc++; } else { pos_desc = pStack->index + 1; if( pStack->index == -1 ) { pStack->disp += extent; } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].elem.common.type ); pStack->disp += description[pStack->index].loop.extent; } } count_desc = description[pos_desc].elem.count; disp_desc = description[pos_desc].elem.disp; } if( OPAL_DATATYPE_LOOP == description[pos_desc].elem.common.type ) { do { PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, description[pos_desc].loop.loops, pStack->disp ); pos_desc++; } while( OPAL_DATATYPE_LOOP == description[pos_desc].loop.common.type ); /* let's start another loop */ DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, description, "advance loops" ); /* update the current state */ count_desc = description[pos_desc].elem.count; disp_desc = description[pos_desc].elem.disp; } while( description[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ type = description[pos_desc].elem.common.type; rc = master->pFunctions[type]( pConvertor, count_desc, pInput, iCount, opal_datatype_basicDatatypes[type]->size, pConvertor->pBaseBuf + pStack->disp + disp_desc, oCount, description[pos_desc].elem.extent, &advance ); iCount -= advance; /* decrease the available space in the buffer */ pInput += advance; /* increase the pointer to the buffer */ bConverted += advance; if( rc != count_desc ) { /* not all data has been converted. Keep the state */ count_desc -= rc; disp_desc += rc * description[pos_desc].elem.extent; goto save_and_return; } pos_desc++; /* advance to the next data */ count_desc = description[pos_desc].elem.count; disp_desc = description[pos_desc].elem.disp; if( iCount == 0 ) goto save_and_return; /* break if there is no more data in the buffer */ } } save_and_return: pConvertor->bConverted += bConverted; /* update the # of bytes already converted */ iov[iov_count].iov_len = bConverted; /* update the iovec length */ total_bytes_converted += bConverted; } *max_data = total_bytes_converted; /* out of the loop: we have complete the data conversion or no more space * in the buffer. */ if( pConvertor->remote_size == pConvertor->bConverted ) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; /* I'm done */ } /* I complete an element, next step I should go to the next one */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, type, count_desc, disp_desc ); return 0; }
static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, int32_t count, dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; ddt_elem_desc_t opt_elem; dt_stack_t* pStack; /* pointer to the position on the stack */ int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0; int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; OPAL_PTRDIFF_TYPE total_disp = 0, last_extent = 1, last_disp = 0; uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ uint32_t i; pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */; pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length ); pTypeDesc->used = 0; SET_EMPTY_ELEMENT( &opt_elem ); assert( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type ); opt_elem.common.type = OPAL_DATATYPE_LOOP; opt_elem.common.flags = 0xFFFF; /* keep all for the first datatype */ opt_elem.count = 0; opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp; opt_elem.extent = 0; while( stack_pos >= 0 ) { if( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */ ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop); if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_disp += last_length; last_length = 0; } CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */ end_loop->first_elem_disp, end_loop->size, end_loop->common.flags ); pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); pStartLoop->items = (pElemDesc - 1)->elem.count; total_disp = pStack->disp; /* update the displacement position */ } pStack--; /* go down one position on the stack */ pos_desc++; continue; } if( OPAL_DATATYPE_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]); ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); OPAL_PTRDIFF_TYPE loop_disp = pData->desc.desc[pos_desc + index].elem.disp; continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { /* the loop is contiguous or composed by contiguous elements with a gap */ if( loop->extent == (OPAL_PTRDIFF_TYPE)end_loop->size ) { /* the whole loop is contiguous */ if( !continuity ) { if( 0 != last_length ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_length = 0; } last_disp = total_disp + loop_disp; } last_length = (last_length * opal_datatype_basicDatatypes[last_type]->size + loop->loops * end_loop->size); last_type = OPAL_DATATYPE_UINT1; last_extent = 1; } else { int counter = loop->loops; OPAL_PTRDIFF_TYPE merged_disp = 0; /* if the previous data is contiguous with this piece and it has a length not ZERO */ if( last_length != 0 ) { if( continuity ) { last_length *= opal_datatype_basicDatatypes[last_type]->size; last_length += end_loop->size; last_type = OPAL_DATATYPE_UINT1; last_extent = 1; counter--; merged_disp = loop->extent; /* merged loop, update the disp of the remaining elems */ } CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_disp += last_length; last_length = 0; last_type = OPAL_DATATYPE_LOOP; } /** * The content of the loop is contiguous (maybe with a gap before or after). * * If any of the loops have been merged with the previous element, then the * displacement of the first element (or the displacement of all elements if the * loop will be removed) must be updated accordingly. */ if( counter <= 2 ) { merged_disp += end_loop->first_elem_disp; while( counter > 0 ) { CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, end_loop->size, merged_disp, 1); pElemDesc++; nbElems++; counter--; merged_disp += loop->extent; } } else { CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags ); pElemDesc++; nbElems++; CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, end_loop->size, loop_disp, 1); pElemDesc++; nbElems++; CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp + merged_disp, end_loop->size, end_loop->common.flags ); pElemDesc++; nbElems++; } } pos_desc += loop->items + 1; } else { ddt_elem_desc_t* elem = (ddt_elem_desc_t*)&(pData->desc.desc[pos_desc+1]); if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_disp += last_length; last_length = 0; last_type = OPAL_DATATYPE_LOOP; } if( 2 == loop->items ) { /* small loop */ if( (1 == elem->count) && (elem->extent == (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[elem->common.type]->size) ) { CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~OPAL_DATATYPE_FLAG_CONTIGUOUS, loop->loops, elem->disp, loop->extent ); pElemDesc++; nbElems++; pos_desc += loop->items + 1; goto complete_loop; } else if( loop->loops < 3 ) { OPAL_PTRDIFF_TYPE elem_displ = elem->disp; for( i = 0; i < loop->loops; i++ ) { CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, elem->count, elem_displ, elem->extent ); elem_displ += loop->extent; pElemDesc++; nbElems++; } pos_desc += loop->items + 1; goto complete_loop; } } CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); pElemDesc++; nbElems++; PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp ); pos_desc++; DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); } complete_loop: total_disp = pStack->disp; /* update the displacement */ continue; } while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ /* now here we have a basic datatype */ type = pData->desc.desc[pos_desc].elem.common.type; continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + pData->desc.desc[pos_desc].elem.disp)); if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && (pData->desc.desc[pos_desc].elem.extent == (int32_t)opal_datatype_basicDatatypes[type]->size) ) { if( type == last_type ) { last_length += pData->desc.desc[pos_desc].elem.count; last_extent = pData->desc.desc[pos_desc].elem.extent; } else { if( last_length == 0 ) { last_type = type; last_length = pData->desc.desc[pos_desc].elem.count; last_extent = pData->desc.desc[pos_desc].elem.extent; } else { last_length = last_length * opal_datatype_basicDatatypes[last_type]->size + pData->desc.desc[pos_desc].elem.count * opal_datatype_basicDatatypes[type]->size; last_type = OPAL_DATATYPE_UINT1; last_extent = 1; } } last_flags &= pData->desc.desc[pos_desc].elem.common.flags; } else { if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; } last_disp = total_disp + pData->desc.desc[pos_desc].elem.disp; last_length = pData->desc.desc[pos_desc].elem.count; last_extent = pData->desc.desc[pos_desc].elem.extent; last_type = type; } pos_desc++; /* advance to the next data */ } } if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; } /* cleanup the stack */ pTypeDesc->used = nbElems - 1; /* except the last fake END_LOOP */ return OPAL_SUCCESS; }