void copyData(const MxInfo info, const double *from, double *to){ /* copy the real part */ const char *xp=(char*)from; char *zp=(char *)to; int dsz=dsz_bytes(info); const char *zendp=(char *)zp+(info.numel)*dsz; if ( isContiguous(info) ){ /* simple linear copy */ while ( zp < zendp ) *zp++ = *xp++; } else { int i; int *subs =(int*)CALLOC(info.nd,sizeof(int)); while ( zp < zendp ) { for( i=0; i < dsz; i++ ) zp[i] = xp[i]; zp += dsz; for( i=0; i < info.nd; i++ ){ /* if reached the last element of this dim */ xp += info.stride[i]*dsz; subs[i]++; /* move on to the next element */ if( subs[i] < info.sz[i] ){/*move this dim on by one and stop!*/ break; } else { subs[i] = 0; /* reset to the start again! */ xp -= info.stride[i]*info.sz[i]*dsz; } } } FREE(subs); } }
boolean_t msgIsOkToMangle( Msg m, char **machMsg, int offset) { #ifdef XK_PROXY_MSG_HACK /* * Is the message contiguous */ if ( ! isContiguous(m) ) { xTrace0(proxy, TR_DETAILED, "message is non-contiguous, must be copied"); return FALSE; } /* * Does this message have the only reference to the stack */ if ( m->stack->refCnt != 1 ) { xTrace0(proxy, TR_DETAILED, "msg stack is shared, must be copied"); return FALSE; } /* * Is there enough space at the front of the stack to write a mach message header? */ if ( m->stackHeadPtr <= m->stackTailPtr && m->stackHeadPtr - m->stack->b.leaf.data >= offset ) { *machMsg = m->stackHeadPtr - offset; } else { xTrace2(proxy, TR_DETAILED, "front of stack is too short (%d < %d)", m->stackHeadPtr - m->stack->b.leaf.data, offset); xTrace0(proxy, TR_DETAILED, "Msg must be copied"); return FALSE; } /* * Is the stack going to be large enough? */ if ( m->stack->b.leaf.data + m->stack->b.leaf.size - *machMsg < XK_MAX_MIG_MSG_LEN ) { xTrace1(proxy, TR_DETAILED, "Usable stack size (%d) is too small, msg must be copied", m->stack->b.leaf.data + m->stack->b.leaf.size - *machMsg); return FALSE; } /* * Is the mach message going to be aligned properly? */ if ( ! LONG_ALIGNED(*machMsg) ) { xTrace0(proxy, TR_DETAILED, "Msg has bad alignment, must be copied"); return FALSE; } xTrace0(proxy, TR_DETAILED, "Can build mach msg around xkernel msg"); xTrace2(proxy, TR_DETAILED, "mach msg start: %x, xk msg start: %x", (int)*machMsg, (int)m->stackHeadPtr); return TRUE; #else /* ! XK_PROXY_MSG_HACK */ return FALSE; #endif /* XK_PROXY_MSG_HACK */ }
//! Get the global index corresponding to the given local index. KOKKOS_INLINE_FUNCTION GlobalOrdinal getGlobalElement (const LocalOrdinal localIndex) const { if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) { return ::Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid (); } if (isContiguous ()) { return getMinGlobalIndex () + localIndex; } else { return lgMap_(localIndex); } }
void ompi_send_f08_desc_f(CFI_cdesc_t *desc, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr) { size_t num_bytes = 0; if (isContiguous(desc)) { //printf("ompi_send_f08_desc_f: buf is contiguous\n"); ompi_send_f(desc->base_addr, count, datatype, dest, tag, comm, ierr); } else { size_t cont_size = desc->elem_len * numElements(desc); void * cont_buf = malloc(cont_size); //assert(cont_buf); num_bytes = (char*) copyToContiguous(desc, cont_buf, 0, desc->rank) - (char*) cont_buf; //printf("ompi_send_f08_desc_f: buf not contiguous, # elements==%ld, sending %ld bytes\n", numElements(desc), num_bytes); ompi_send_f(cont_buf, count, datatype, dest, tag, comm, ierr); free(cont_buf); } }
/* optimise the order of dimension in the query so the tprod code works most efficiently */ TprodErrorCode optimisetprodQuery(MxInfo *zrest, MxInfo *xrest, MxInfo *yrest, MxInfo *xmacc, MxInfo *ymacc){ int i; /* first just squeeze out redundant dimesions */ squeezemxInfoPair(xrest,yrest); squeezemxInfoPair(xmacc,ymacc); /* validate that the input is of the right type */ if ( zrest->nd == xrest->nd ){ /* then xrestinfo can't have been squashed so zrest is just zinfo */ } else { /* xrest is squashed, so compute the squashed zrest if possible */ if ( !isContiguous(*zrest) ) { ERROR("Z must be contiguous for tprod to work!"); return NONCONTIGUOUSZ; } /* we need to compute zrestinfo now to know how to stride over z */ zrest->numel=1; zrest->stride[0]=zrest->stride[0]; /* should be 1? */ for (i=0;i<xrest->nd;i++){ /* N.B. this assumes z is contiguous! */ zrest->sz[i] = MAX(xrest->sz[i],yrest->sz[i]); zrest->stride[i+1] = zrest->sz[i]*zrest->stride[i]; zrest->numel *= zrest->sz[i]; } zrest->nd=xrest->nd; } /* if ( zrest->nd==0 ) zrest->nd=1; */ /*move any aligned dimensions out of the first 2 dims so we can use the 2x2 step code*/ /* for(i=0; i< MAX(zrest->nd,2); i++){ */ /* if( xrest->sz[i]==yrest->sz[i] && */ /* xrest->stride[i]>0 && yrest->stride[i]>0 ){ /\* aligned dimension! *\/ */ /* /\* see if we can find an op dim to swap with? *\/ */ /* int j,k; */ /* for (j=i+1;j<zrest->nd;j++){ */ /* if ( xrest->sz[j]!=yrest->sz[j] /\* not aligned dim *\/ */ /* || xrest->stride[j]==0 || yrest->stride[j]==0 ) break; */ /* } */ /* /\* insert this op dim in this place and move everything else up by 1 *\/ */ /* if ( j < zrest->nd ) { */ /* int zstride=zrest->stride[j], zsz=zrest->sz[j]; */ /* int xstride=xrest->stride[j], xsz=xrest->sz[j]; */ /* int ystride=yrest->stride[j], ysz=yrest->sz[j]; */ /* for(k=j;k>i;k--){ /\* move down by 1 *\/ */ /* zrest->stride[k]=zrest->stride[k-1]; zrest->sz[k]=zrest->sz[k-1]; */ /* xrest->stride[k]=xrest->stride[k-1]; xrest->sz[k]=xrest->sz[k-1]; */ /* yrest->stride[k]=yrest->stride[k-1]; yrest->sz[k]=yrest->sz[k-1]; */ /* } */ /* zrest->stride[k]=zstride; zrest->sz[k]=zsz;/\* insert in new place *\/ */ /* xrest->stride[k]=xstride; xrest->sz[k]=xsz; */ /* yrest->stride[k]=ystride; yrest->sz[k]=ysz; */ /* } else { /\* no OP dims so nothing we can do ! *\/ */ /* } */ /* } */ /* } */ /* N.B. In the inner loops we ASSUME that X comes first in the output. Hence we re-arrange X and Y to ensure this is so */ if( yrest->stride[0]>0 && /* yrest not empty & x is empty or after */ ( xrest->stride[0]==0 || xrest->stride[0]>yrest->stride[0] ) ){ /*mexWarnMsgTxt("swapping X and Y");*/ MxInfo tmp; tmp = *yrest; *yrest=*xrest; *xrest=tmp; tmp = *ymacc; *ymacc=*xmacc; *xmacc=tmp; } return OK; }
boolean_t msgIsContiguous( Msg m) { return isContiguous(m); }
LOCA::Extended::MultiVector::MultiVector( const LOCA::Extended::MultiVector& source, const std::vector<int>& index, bool view) : globalData(source.globalData), numColumns(index.size()), numMultiVecRows(source.numMultiVecRows), numScalarRows(source.numScalarRows), multiVectorPtrs(numMultiVecRows), scalarsPtr(), extendedVectorPtrs(numColumns), isView(view) { // Check indices are valid for (unsigned int j=0; j<index.size(); j++) source.checkIndex("LOCA::Extended::MultiVector()", index[j]); for (int i=0; i<numColumns; i++) { extendedVectorPtrs[i] = Teuchos::null; } // Check if indices are contiguous bool isCont = isContiguous(index); if (view) { // Copy multivectors for (int i=0; i<numMultiVecRows; i++) multiVectorPtrs[i] = source.multiVectorPtrs[i]->subView(index); // Copy Scalars if (isCont) { double *vals = source.scalarsPtr->values() + source.scalarsPtr->numRows()*index[0]; scalarsPtr = Teuchos::rcp(new NOX::Abstract::MultiVector::DenseMatrix(Teuchos::View, vals, numScalarRows, numScalarRows, numColumns)); } else { globalData->locaErrorCheck->throwError( "LOCA::Extended::MultiVector()", "Sub-view with non-contiguous indices is not supported"); } } else { // Copy multivectors for (int i=0; i<numMultiVecRows; i++) multiVectorPtrs[i] = source.multiVectorPtrs[i]->subCopy(index); // Copy scalars if (isCont) { double *vals = source.scalarsPtr->values() + source.scalarsPtr->numRows()*index[0]; scalarsPtr = Teuchos::rcp(new NOX::Abstract::MultiVector::DenseMatrix(Teuchos::Copy, vals, numScalarRows, numScalarRows, numColumns)); } else { scalarsPtr = Teuchos::rcp(new NOX::Abstract::MultiVector::DenseMatrix(numScalarRows, numColumns)); for (int j=0; j<numColumns; j++) for (int i=0; i<numScalarRows; i++) (*scalarsPtr)(i,j) = (*source.scalarsPtr)(i,index[j]); } } }