static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, int32_t count, dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; ddt_elem_desc_t opt_elem; dt_stack_t* pStack; /* pointer to the position on the stack */ int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0; int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; OPAL_PTRDIFF_TYPE total_disp = 0, last_extent = 1, last_disp = 0; uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ uint32_t i; pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */; pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length ); pTypeDesc->used = 0; SET_EMPTY_ELEMENT( &opt_elem ); assert( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type ); opt_elem.common.type = OPAL_DATATYPE_LOOP; opt_elem.common.flags = 0xFFFF; /* keep all for the first datatype */ opt_elem.count = 0; opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp; opt_elem.extent = 0; while( stack_pos >= 0 ) { if( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */ ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop); if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_disp += last_length; last_length = 0; } CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */ end_loop->first_elem_disp, end_loop->size, end_loop->common.flags ); pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); pStartLoop->items = (pElemDesc - 1)->elem.count; total_disp = pStack->disp; /* update the displacement position */ } pStack--; /* go down one position on the stack */ pos_desc++; continue; } if( OPAL_DATATYPE_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]); ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); OPAL_PTRDIFF_TYPE loop_disp = pData->desc.desc[pos_desc + index].elem.disp; continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { /* the loop is contiguous or composed by contiguous elements with a gap */ if( loop->extent == (OPAL_PTRDIFF_TYPE)end_loop->size ) { /* the whole loop is contiguous */ if( !continuity ) { if( 0 != last_length ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_length = 0; } last_disp = total_disp + loop_disp; } last_length = (last_length * opal_datatype_basicDatatypes[last_type]->size + loop->loops * end_loop->size); last_type = OPAL_DATATYPE_UINT1; last_extent = 1; } else { int counter = loop->loops; OPAL_PTRDIFF_TYPE merged_disp = 0; /* if the previous data is contiguous with this piece and it has a length not ZERO */ if( last_length != 0 ) { if( continuity ) { last_length *= opal_datatype_basicDatatypes[last_type]->size; last_length += end_loop->size; last_type = OPAL_DATATYPE_UINT1; last_extent = 1; counter--; merged_disp = loop->extent; /* merged loop, update the disp of the remaining elems */ } CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_disp += last_length; last_length = 0; last_type = OPAL_DATATYPE_LOOP; } /** * The content of the loop is contiguous (maybe with a gap before or after). * * If any of the loops have been merged with the previous element, then the * displacement of the first element (or the displacement of all elements if the * loop will be removed) must be updated accordingly. */ if( counter <= 2 ) { merged_disp += end_loop->first_elem_disp; while( counter > 0 ) { CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, end_loop->size, merged_disp, 1); pElemDesc++; nbElems++; counter--; merged_disp += loop->extent; } } else { CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags ); pElemDesc++; nbElems++; CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, end_loop->size, loop_disp, 1); pElemDesc++; nbElems++; CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp + merged_disp, end_loop->size, end_loop->common.flags ); pElemDesc++; nbElems++; } } pos_desc += loop->items + 1; } else { ddt_elem_desc_t* elem = (ddt_elem_desc_t*)&(pData->desc.desc[pos_desc+1]); if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; last_disp += last_length; last_length = 0; last_type = OPAL_DATATYPE_LOOP; } if( 2 == loop->items ) { /* small loop */ if( (1 == elem->count) && (elem->extent == (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[elem->common.type]->size) ) { CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~OPAL_DATATYPE_FLAG_CONTIGUOUS, loop->loops, elem->disp, loop->extent ); pElemDesc++; nbElems++; pos_desc += loop->items + 1; goto complete_loop; } else if( loop->loops < 3 ) { OPAL_PTRDIFF_TYPE elem_displ = elem->disp; for( i = 0; i < loop->loops; i++ ) { CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, elem->count, elem_displ, elem->extent ); elem_displ += loop->extent; pElemDesc++; nbElems++; } pos_desc += loop->items + 1; goto complete_loop; } } CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); pElemDesc++; nbElems++; PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp ); pos_desc++; DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); } complete_loop: total_disp = pStack->disp; /* update the displacement */ continue; } while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ /* now here we have a basic datatype */ type = pData->desc.desc[pos_desc].elem.common.type; continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + pData->desc.desc[pos_desc].elem.disp)); if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && (pData->desc.desc[pos_desc].elem.extent == (int32_t)opal_datatype_basicDatatypes[type]->size) ) { if( type == last_type ) { last_length += pData->desc.desc[pos_desc].elem.count; last_extent = pData->desc.desc[pos_desc].elem.extent; } else { if( last_length == 0 ) { last_type = type; last_length = pData->desc.desc[pos_desc].elem.count; last_extent = pData->desc.desc[pos_desc].elem.extent; } else { last_length = last_length * opal_datatype_basicDatatypes[last_type]->size + pData->desc.desc[pos_desc].elem.count * opal_datatype_basicDatatypes[type]->size; last_type = OPAL_DATATYPE_UINT1; last_extent = 1; } } last_flags &= pData->desc.desc[pos_desc].elem.common.flags; } else { if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; } last_disp = total_disp + pData->desc.desc[pos_desc].elem.disp; last_length = pData->desc.desc[pos_desc].elem.count; last_extent = pData->desc.desc[pos_desc].elem.extent; last_type = type; } pos_desc++; /* advance to the next data */ } } if( last_length != 0 ) { CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); pElemDesc++; nbElems++; } /* cleanup the stack */ pTypeDesc->used = nbElems - 1; /* except the last fake END_LOOP */ return OPAL_SUCCESS; }
/* we have 3 differents structures to update: * the first is the real representation of the datatype * the second is the internal representation using extents * the last is the representation used for send operations * If the count is ZERO we dont have to add the pdtAdd datatype. But we have to * be sure that the pdtBase datatype is correctly initialized with all fields * set to ZERO if it's a empty datatype. */ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, uint32_t count, OPAL_PTRDIFF_TYPE disp, OPAL_PTRDIFF_TYPE extent ) { uint32_t newLength, place_needed = 0, i; short localFlags = 0; /* no specific options yet */ dt_elem_desc_t *pLast, *pLoop = NULL; OPAL_PTRDIFF_TYPE lb, ub, true_lb, true_ub, epsilon, old_true_ub; /* the extent should always be positive. So a negative * value here have a special meaning ie. default extent as * computed by ub - lb */ if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb); /* Deal with the special markers (OPAL_DATATYPE_LB and OPAL_DATATYPE_UB) */ if( OPAL_DATATYPE_LB == pdtAdd->id ) { pdtBase->bdt_used |= (((uint32_t)1) << OPAL_DATATYPE_LB); if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_LB ) { pdtBase->lb = LMIN( pdtBase->lb, disp ); } else { pdtBase->lb = disp; pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB; } if( (pdtBase->ub - pdtBase->lb) != (OPAL_PTRDIFF_TYPE)pdtBase->size ) { pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; } return OPAL_SUCCESS; /* Just ignore the OPAL_DATATYPE_LOOP and OPAL_DATATYPE_END_LOOP */ } else if( OPAL_DATATYPE_UB == pdtAdd->id ) { pdtBase->bdt_used |= (((uint32_t)1) << OPAL_DATATYPE_UB); if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB ) { pdtBase->ub = LMAX( pdtBase->ub, disp ); } else { pdtBase->ub = disp; pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB; } if( (pdtBase->ub - pdtBase->lb) != (OPAL_PTRDIFF_TYPE)pdtBase->size ) { pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; } return OPAL_SUCCESS; /* Just ignore the OPAL_DATATYPE_LOOP and OPAL_DATATYPE_END_LOOP */ } /* Compute the number of entries we need in the datatype description */ OPAL_DATATYPE_COMPUTE_REQUIRED_ENTRIES( pdtAdd, count, extent, place_needed ); /* * Compute the lower and upper bound of the datatype. We do it in 2 steps. * First compute the lb and ub of the new datatype taking in account the * count. Then update the lb value depending on the user markers and * update the global lb and ub. */ OPAL_DATATYPE_LB_UB_CONT( count, disp, pdtAdd->lb, pdtAdd->ub, extent, lb, ub ); /* Compute the true_lb and true_ub for the datatype to be added, taking * in account the number of repetions. These values do not include the * potential gaps at the begining and at the end of the datatype. */ true_lb = lb - (pdtAdd->lb - pdtAdd->true_lb); true_ub = ub - (pdtAdd->ub - pdtAdd->true_ub); if( true_lb > true_ub ) { old_true_ub = true_lb; true_lb = true_ub; true_ub = old_true_ub; } #if 0 /* Avoid claiming overlap as much as possible. */ if( !(pdtBase->flags & OPAL_DATATYPE_FLAG_OVERLAP) ) { if( ((disp + true_lb) >= pdtBase->true_ub) || ((disp + true_ub) <= pdtBase->true_lb) ) { } else { /* potential overlap */ } } #endif /* The lower bound should be inherited from the parent if and only * if the USER has explicitly set it. The result lb is the MIN between * the all lb + disp if and only if all or nobody flags's contain the LB. */ if( (pdtAdd->flags ^ pdtBase->flags) & OPAL_DATATYPE_FLAG_USER_LB ) { if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_LB ) { lb = pdtBase->lb; /* base type has a user provided lb */ } pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB; } else { /* both of them have the LB flag or both of them dont have it */ lb = LMIN( pdtBase->lb, lb ); } /* the same apply for the upper bound except for the case where * either of them has the flag UB, in which case we should * compute the UB including the natural alignement of the data. */ if( (pdtBase->flags ^ pdtAdd->flags) & OPAL_DATATYPE_FLAG_USER_UB ) { if( pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB ) { ub = pdtBase->ub; } pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB; } else { /* both of them have the UB flag or both of them dont have it */ /* we should compute the extent depending on the alignement */ ub = LMAX( pdtBase->ub, ub ); } /* While the true_lb and true_ub have to be ordered to have the true_lb lower * than the true_ub, the ub and lb do not have to be ordered. They should be * as the user define them. */ pdtBase->lb = lb; pdtBase->ub = ub; /* compute the new memory alignement */ pdtBase->align = IMAX( pdtBase->align, pdtAdd->align ); /* Now that we have the new ub and the alignment we should update the ub to match * the new alignement. We have to add an epsilon that is the least nonnegative * increment needed to roung the extent to the next multiple of the alignment. * This rule apply only if there is user specified upper bound as stated in the * MPI standard MPI 1.2 page 71. */ if( !(pdtBase->flags & OPAL_DATATYPE_FLAG_USER_UB) ) { epsilon = (pdtBase->ub - pdtBase->lb) % pdtBase->align; if( 0 != epsilon ) { pdtBase->ub += (pdtBase->align - epsilon); } } /* now we know it contain some data */ pdtBase->flags |= OPAL_DATATYPE_FLAG_DATA; /* * the count == 0 is LEGAL only for MPI_UB and MPI_LB. Therefore we support it * here in the upper part of this function. As an extension, the count set to * zero can be used to reset the alignment of the data, but not for changing * the true_lb and true_ub. */ if( (0 == count) || (0 == pdtAdd->size) ) { return OPAL_SUCCESS; } /* Now, once we know everything is fine and there are some bytes in * the data-type we can update the size, true_lb and true_ub. */ pdtBase->size += count * pdtAdd->size; if( 0 == pdtBase->nbElems ) old_true_ub = disp; else old_true_ub = pdtBase->true_ub; if( 0 != pdtBase->size ) { pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb ); pdtBase->true_ub = LMAX( true_ub, pdtBase->true_ub ); } else { pdtBase->true_lb = true_lb; pdtBase->true_ub = true_ub; } pdtBase->bdt_used |= pdtAdd->bdt_used; newLength = pdtBase->desc.used + place_needed; if( newLength > pdtBase->desc.length ) { newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK; pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc, sizeof(dt_elem_desc_t) * newLength ); pdtBase->desc.length = newLength; } pLast = &(pdtBase->desc.desc[pdtBase->desc.used]); /* The condition to be able to use the optimized path here is to be in presence * of an predefined contiguous datatype. This part is unable to handle any * predefined non contiguous datatypes (like MPI_SHORT_INT). */ if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { pdtBase->btypes[pdtAdd->id] += count; if( (extent != (OPAL_PTRDIFF_TYPE)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ localFlags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITED | OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); CREATE_LOOP_START( pLast, count, 2, extent, localFlags ); pLast++; pLast->elem.common.type = pdtAdd->id; pLast->elem.count = 1; pLast->elem.disp = disp; pLast->elem.extent = pdtAdd->size; pLast->elem.common.flags = localFlags | OPAL_DATATYPE_FLAG_CONTIGUOUS; pLast++; CREATE_LOOP_END( pLast, 2, disp, pdtAdd->size, localFlags ); pdtBase->desc.used += 3; pdtBase->btypes[OPAL_DATATYPE_LOOP] = 1; pdtBase->btypes[OPAL_DATATYPE_END_LOOP] = 1; } else { pLast->elem.common.type = pdtAdd->id; pLast->elem.count = count; pLast->elem.disp = disp; pLast->elem.extent = extent; pdtBase->desc.used++; pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITED); } } else { /* keep trace of the total number of basic datatypes in the datatype definition */ pdtBase->btypes[OPAL_DATATYPE_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_LOOP]; pdtBase->btypes[OPAL_DATATYPE_END_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_END_LOOP]; pdtBase->btypes[OPAL_DATATYPE_LB] |= pdtAdd->btypes[OPAL_DATATYPE_LB]; pdtBase->btypes[OPAL_DATATYPE_UB] |= pdtAdd->btypes[OPAL_DATATYPE_UB]; for( i = 4; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]); if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && (extent == pdtAdd->desc.desc[0].elem.extent) ) { pLast->elem = pdtAdd->desc.desc[0].elem; pLast->elem.count *= count; pLast->elem.disp += disp; pdtBase->desc.used++; } else { /* if the extent of the datatype is the same as the extent of the loop * description of the datatype then we simply have to update the main loop. */ if( count != 1 ) { pLoop = pLast; CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent, (pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITED)) ); pdtBase->btypes[OPAL_DATATYPE_LOOP] += 2; pdtBase->desc.used += 2; pLast++; } for( i = 0; i < pdtAdd->desc.used; i++ ) { pLast->elem = pdtAdd->desc.desc[i].elem; if( OPAL_DATATYPE_FLAG_DATA & pLast->elem.common.flags ) pLast->elem.disp += disp; else if( OPAL_DATATYPE_END_LOOP == pLast->elem.common.type ) { pLast->end_loop.first_elem_disp += disp; } pLast++; } pdtBase->desc.used += pdtAdd->desc.used; if( pLoop != NULL ) { int index = GET_FIRST_NON_LOOP( pLoop ); assert( pLoop[index].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ); CREATE_LOOP_END( pLast, pdtAdd->desc.used + 1, pLoop[index].elem.disp, pdtAdd->size, pLoop->loop.common.flags ); } } /* should I add some space until the extent of this datatype ? */ } /* Is the data still contiguous ? * The only way for the data to be contiguous is to have the true extent * equal to his size. In other words to avoid having internal gaps between * elements. If any of the data are overlapping then this method will not work. */ localFlags = pdtBase->flags & pdtAdd->flags; UNSET_CONTIGUOUS_FLAG(pdtBase->flags); if( (localFlags & OPAL_DATATYPE_FLAG_CONTIGUOUS) /* both type were contiguous */ && ((disp + pdtAdd->true_lb) == old_true_ub) /* and there is no gap between them */ && ( ((OPAL_PTRDIFF_TYPE)pdtAdd->size == extent) /* the size and the extent of the * added type have to match */ || (count < 2)) ) { /* if the count is bigger than 2 */ SET_CONTIGUOUS_FLAG(pdtBase->flags); if( (OPAL_PTRDIFF_TYPE)pdtBase->size == (pdtBase->ub - pdtBase->lb) ) SET_NO_GAP_FLAG(pdtBase->flags); } /* If the NO_GAP flag is set the contiguous have to be set too */ if( pdtBase->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) { assert( pdtBase->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ); } pdtBase->nbElems += (count * pdtAdd->nbElems); return OPAL_SUCCESS; }
/* we have 3 differents structures to update: * the first is the real representation of the datatype * the second is the internal representation using extents * the last is the representation used for send operations * If the count is ZERO we dont have to add the pdtAdd datatype. But we have to * be sure that the pdtBase datatype is correctly initialized with all fields * set to ZERO if it's a empty datatype. */ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd, uint32_t count, ptrdiff_t disp, ptrdiff_t extent ) { uint32_t newLength, place_needed = 0, i; short localFlags = 0; /* no specific options yet */ dt_elem_desc_t *pLast, *pLoop = NULL; ptrdiff_t lb, ub, true_lb, true_ub, epsilon, old_true_ub; /* the extent should always be positive. So a negative * value here have a special meaning ie. default extent as * computed by ub - lb */ if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb); if( pdtAdd->flags & DT_FLAG_PREDEFINED ) { /* add a basic datatype */ /* handle special cases for DT_LB and DT_UB */ if( pdtAdd == ompi_ddt_basicDatatypes[DT_LB] ) { pdtBase->bdt_used |= (((uint64_t)1) << DT_LB); if( pdtBase->flags & DT_FLAG_USER_LB ) { pdtBase->lb = LMIN( pdtBase->lb, disp ); } else { pdtBase->lb = disp; pdtBase->flags |= DT_FLAG_USER_LB; } if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { pdtBase->flags &= ~DT_FLAG_NO_GAPS; } return OMPI_SUCCESS; } else if( pdtAdd == ompi_ddt_basicDatatypes[DT_UB] ) { pdtBase->bdt_used |= (((uint64_t)1) << DT_UB); if( pdtBase->flags & DT_FLAG_USER_UB ) { pdtBase->ub = LMAX( pdtBase->ub, disp ); } else { pdtBase->ub = disp; pdtBase->flags |= DT_FLAG_USER_UB; } if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { pdtBase->flags &= ~DT_FLAG_NO_GAPS; } return OMPI_SUCCESS; } place_needed = (extent == (ptrdiff_t)pdtAdd->size ? 1 : 3); } else { place_needed = pdtAdd->desc.used; if( count != 1 ) place_needed += 2; /* for the loop markers */ } /* * Compute the lower and upper bound of the datatype. We do it in 2 steps. * First compute the lb and ub of the new datatype taking in account the * count. Then update the lb value depending on the user markers and * update the global lb and ub. */ OMPI_DDT_LB_UB_CONT( count, disp, pdtAdd->lb, pdtAdd->ub, extent, lb, ub ); /* The true_lb and true_ub take in account the gaps at the begining and the * end of the datatype independing on the number of repetitions of the datatype. */ true_lb = lb - (pdtAdd->lb - pdtAdd->true_lb); true_ub = ub - (pdtAdd->ub - pdtAdd->true_ub); if( true_lb > true_ub ) { old_true_ub = true_lb; true_lb = true_ub; true_ub = old_true_ub; } /* the lower bound should be inherited from the parent if and only * if the USER has explicitly set it. The result lb is the MIN between * the all lb + disp if and only if all or nobody flags's contain the LB. */ if( (pdtAdd->flags ^ pdtBase->flags) & DT_FLAG_USER_LB ) { if( pdtBase->flags & DT_FLAG_USER_LB ) { lb = pdtBase->lb; /* base type has a user provided lb */ } pdtBase->flags |= DT_FLAG_USER_LB; } else { /* both of them have the LB flag or both of them dont have it */ lb = LMIN( pdtBase->lb, lb ); } /* the same apply for the upper bound except for the case where * either of them has the flag UB, in which case we should * compute the UB including the natural alignement of the data. */ if( (pdtBase->flags ^ pdtAdd->flags) & DT_FLAG_USER_UB ) { if( pdtBase->flags & DT_FLAG_USER_UB ) { ub = pdtBase->ub; } pdtBase->flags |= DT_FLAG_USER_UB; } else { /* both of them have the UB flag or both of them dont have it */ /* we should compute the extent depending on the alignement */ ub = LMAX( pdtBase->ub, ub ); } /* While the true_lb and true_ub have to be ordered to have the true_lb lower * than the true_ub, the ub and lb does not have to be ordered. They should be * as the user define them. */ pdtBase->lb = lb; pdtBase->ub = ub; if( 0 == pdtBase->nbElems ) old_true_ub = disp; else old_true_ub = pdtBase->true_ub; pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb ); pdtBase->true_ub = LMAX( true_ub, pdtBase->true_ub ); /* compute the new memory alignement */ pdtBase->align = IMAX( pdtBase->align, pdtAdd->align ); pdtBase->size += count * pdtAdd->size; /* Now that we have the new ub and the alignment we should update the ub to match * the new alignement. We have to add an epsilon that is the least nonnegative increment * needed to roung the extent to the next multiple of the alignment. This rule * apply only if there is user specified upper bound as stated in the MPI * standard MPI 1.2 page 71. */ if( !(pdtBase->flags & DT_FLAG_USER_UB) ) { epsilon = (pdtBase->ub - pdtBase->lb) % pdtBase->align; if( 0 != epsilon ) { pdtBase->ub += (pdtBase->align - epsilon); } } /* * the count == 0 is LEGAL only for MPI_UB and MPI_LB. I accept it just as a nice way to set * the soft UB for a data (without using a real UB marker). This approach can be used to * create the subarray and darray datatype. However from the MPI level this function * should never be called directly with a count set to 0. * Adding a data-type with a size zero is legal but does not have to go through all the * stuff below. */ if( (0 == count) || (0 == pdtAdd->size) ) { return OMPI_SUCCESS; } pdtBase->bdt_used |= pdtAdd->bdt_used; newLength = pdtBase->desc.used + place_needed; if( newLength > pdtBase->desc.length ) { newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK; pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc, sizeof(dt_elem_desc_t) * newLength ); pdtBase->desc.length = newLength; } pLast = &(pdtBase->desc.desc[pdtBase->desc.used]); /* The condition to be able to use the optimized path here is to be in presence * of an predefined contiguous datatype. This part is unable to handle any * predefined non contiguous datatypes (like MPI_SHORT_INT). */ if( (pdtAdd->flags & (DT_FLAG_PREDEFINED | DT_FLAG_DATA)) == (DT_FLAG_PREDEFINED | DT_FLAG_DATA) ) { pdtBase->btypes[pdtAdd->id] += count; if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ localFlags = pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS | DT_FLAG_NO_GAPS); CREATE_LOOP_START( pLast, count, 2, extent, localFlags ); pLast++; pLast->elem.common.type = pdtAdd->id; pLast->elem.count = 1; pLast->elem.disp = disp; pLast->elem.extent = pdtAdd->size; pLast->elem.common.flags = localFlags | DT_FLAG_CONTIGUOUS; pLast++; CREATE_LOOP_END( pLast, 2, disp, pdtAdd->size, localFlags ); pdtBase->desc.used += 3; pdtBase->btypes[DT_LOOP] = 1; pdtBase->btypes[DT_END_LOOP] = 1; } else { pLast->elem.common.type = pdtAdd->id; pLast->elem.count = count; pLast->elem.disp = disp; pLast->elem.extent = extent; pdtBase->desc.used++; pLast->elem.common.flags = pdtAdd->flags & ~(DT_FLAG_COMMITED); } } else { /* keep trace of the total number of basic datatypes in the datatype definition */ pdtBase->btypes[DT_LOOP] += pdtAdd->btypes[DT_LOOP]; pdtBase->btypes[DT_END_LOOP] += pdtAdd->btypes[DT_END_LOOP]; pdtBase->btypes[DT_LB] |= pdtAdd->btypes[DT_LB]; pdtBase->btypes[DT_UB] |= pdtAdd->btypes[DT_UB]; for( i = 4; i < DT_MAX_PREDEFINED; i++ ) if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]); if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && (extent == pdtAdd->desc.desc[0].elem.extent) ){ pLast->elem = pdtAdd->desc.desc[0].elem; pLast->elem.count *= count; pLast->elem.disp += disp; pdtBase->desc.used++; } else { /* if the extent of the datatype is the same as the extent of the loop * description of the datatype then we simply have to update the main loop. */ if( count != 1 ) { pLoop = pLast; CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent, (pdtAdd->flags & ~(DT_FLAG_COMMITED)) ); pdtBase->btypes[DT_LOOP] += 2; pdtBase->desc.used += 2; pLast++; } for( i = 0; i < pdtAdd->desc.used; i++ ) { pLast->elem = pdtAdd->desc.desc[i].elem; if( DT_FLAG_DATA & pLast->elem.common.flags ) pLast->elem.disp += disp; else if( DT_END_LOOP == pLast->elem.common.type ) { pLast->end_loop.first_elem_disp += disp; } pLast++; } pdtBase->desc.used += pdtAdd->desc.used; if( pLoop != NULL ) { int index = GET_FIRST_NON_LOOP( pLoop ); assert( pLoop[index].elem.common.flags & DT_FLAG_DATA ); CREATE_LOOP_END( pLast, pdtAdd->desc.used + 1, pLoop[index].elem.disp, pdtAdd->size, pLoop->loop.common.flags ); } } /* should I add some space until the extent of this datatype ? */ } /* Is the data still contiguous ? * The only way for the data to be contiguous is to have the true extent * equal to his size. In other words to avoid having internal gaps between * elements. If any of the data are overlapping then this method will not work. */ localFlags = pdtBase->flags & pdtAdd->flags; UNSET_CONTIGUOUS_FLAG(pdtBase->flags); if( disp != old_true_ub ) { /* is there a gap between the 2 datatypes ? */ if( disp < old_true_ub ) pdtBase->flags |= DT_FLAG_OVERLAP; } else { if( (localFlags & DT_FLAG_CONTIGUOUS) /* both have to be contiguous */ && ( ((ptrdiff_t)pdtAdd->size == extent) /* the size and the extent of the * added type have to match */ || (count < 2)) ) { /* if the count is bigger than 2 */ SET_CONTIGUOUS_FLAG(pdtBase->flags); if( (ptrdiff_t)pdtBase->size == (pdtBase->ub - pdtBase->lb) ) SET_NO_GAP_FLAG(pdtBase->flags); } } /* If the NO_GAP flag is set the contiguous have to be set too */ if( pdtBase->flags & DT_FLAG_NO_GAPS ) { assert( pdtBase->flags & DT_FLAG_CONTIGUOUS ); } pdtBase->nbElems += (count * pdtAdd->nbElems); return OMPI_SUCCESS; }