void Decompress(char* source, char* dest) { BinaryReader reader(source); ReadHeader(reader); BinaryWriter writer(dest); SlidingWindow window(nullptr, bufferSize_, MaxOffset(), MaxLength() - 1); window.Initialize(); while(reader.IsEOF() == false) { int length = reader.ReadBits(lengthBits_); int value; if(length == MaxLength()) { break; } if(length == 0) { value = reader.ReadByte(); } else { int offset = reader.ReadBits(offsetBits_); value = reader.ReadByte(); for(int i = 0; i < length; i++) { int temp = window.CurrentByte(-offset - 1); writer.WriteByte(temp); window.WriteByte(temp); window.AdvanceWindow(1); } } writer.WriteByte(value); window.WriteByte(value); window.AdvanceWindow(1); } }
void AllGather ( const DistMatrix<T, U, V >& A, DistMatrix<T,Collect<U>(),Collect<V>()>& B ) { EL_DEBUG_CSE AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.SetGrid( A.Grid() ); B.Resize( height, width ); if( A.Participating() ) { if( A.DistSize() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else { const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int distStride = colStride*rowStride; const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,rowStride); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); vector<T> buf; FastResize( buf, (distStride+1)*portionSize ); T* sendBuf = &buf[0]; T* recvBuf = &buf[portionSize]; // Pack util::InterleaveMatrix ( A.LocalHeight(), A.LocalWidth(), A.LockedBuffer(), 1, A.LDim(), sendBuf, 1, A.LocalHeight() ); // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() ); // Unpack util::StridedUnpack ( height, width, A.ColAlign(), colStride, A.RowAlign(), rowStride, recvBuf, portionSize, B.Buffer(), B.LDim() ); } } if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF ) El::Broadcast( B, A.CrossComm(), A.Root() ); }
void Scatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::Scatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("Sizes of A and B must match"); if( !B.Participating() ) return; const Int colStride = B.ColStride(); const Int rowStride = B.RowStride(); const Int colAlign = B.ColAlign(); const Int rowAlign = B.RowAlign(); const Int height = B.Height(); const Int width = B.Width(); const Int localHeight = B.LocalHeight(); const Int localWidth = B.LocalWidth(); const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,rowStride); const Int recvSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); const Int sendSize = colStride*rowStride*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::StridedPack ( height, width, colAlign, colStride, rowAlign, rowStride, A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.DistComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, localWidth, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); }
void vec_GF2::FixLength(long n) { if (MaxLength() > 0 || fixed()) LogicError("can't fix this vector"); SetLength(n); _maxlen |= 1; }
void PartialColScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::PartialColScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("A and B must be the same size"); #ifdef EL_CACHE_WARNINGS if( A.Width() != 1 && A.Grid().Rank() == 0 ) { cerr << "axpy_contract::PartialColScatterUpdate potentially causes a large " "amount of cache-thrashing. If possible, avoid it by forming the " "(conjugate-)transpose of the [UGath,* ] matrix instead." << endl; } #endif if( B.ColAlign() % A.ColStride() == A.ColAlign() ) { const Int colStride = B.ColStride(); const Int colStridePart = B.PartialColStride(); const Int colStrideUnion = B.PartialUnionColStride(); const Int colRankPart = B.PartialColRank(); const Int colAlign = B.ColAlign(); const Int height = B.Height(); const Int width = B.Width(); const Int localHeight = B.LocalHeight(); const Int maxLocalHeight = MaxLength( height, colStride ); const Int recvSize = mpi::Pad( maxLocalHeight*width ); const Int sendSize = colStrideUnion*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::PartialColStridedPack ( height, width, colAlign, colStride, colStrideUnion, colStridePart, colRankPart, A.ColShift(), A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionColComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, width, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); } else LogicError("Unaligned PartialColScatter not implemented"); }
const DistMatrix<T,STAR,STAR>& DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,VR,STAR>& A ) { #ifndef RELEASE CallStackEntry entry("[* ,* ] = [VR,* ]"); this->AssertNotLocked(); this->AssertSameGrid( A.Grid() ); #endif const elem::Grid& g = this->Grid(); this->ResizeTo( A.Height(), A.Width() ); if( !this->Participating() ) return *this; const Int p = g.Size(); const Int height = this->Height(); const Int width = this->Width(); const Int localHeightOfA = A.LocalHeight(); const Int maxLocalHeight = MaxLength(height,p); const Int portionSize = mpi::Pad( maxLocalHeight*width ); T* buffer = this->auxMemory_.Require( (p+1)*portionSize ); T* sendBuf = &buffer[0]; T* recvBuf = &buffer[portionSize]; // Pack const Int ALDim = A.LDim(); const T* ABuf = A.LockedBuffer(); PARALLEL_FOR for( Int j=0; j<width; ++j ) MemCopy ( &sendBuf[j*localHeightOfA], &ABuf[j*ALDim], localHeightOfA ); // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, g.VRComm() ); // Unpack T* thisBuf = this->Buffer(); const Int thisLDim = this->LDim(); const Int colAlignmentOfA = A.ColAlignment(); OUTER_PARALLEL_FOR for( Int k=0; k<p; ++k ) { const T* data = &recvBuf[k*portionSize]; const Int colShift = Shift_( k, colAlignmentOfA, p ); const Int localHeight = Length_( height, colShift, p ); INNER_PARALLEL_FOR for( Int j=0; j<width; ++j ) { T* destCol = &thisBuf[colShift+j*thisLDim]; const T* sourceCol = &data[j*localHeight]; for( Int iLoc=0; iLoc<localHeight; ++iLoc ) destCol[iLoc*p] = sourceCol[iLoc]; } } this->auxMemory_.Release(); return *this; }
void vec_GF2::FixAtCurrentLength() { if (fixed()) return; if (length() != MaxLength()) LogicError("FixAtCurrentLength: can't fix this vector"); _maxlen |= 1; }
TPtr16 &TPtr16::operator=(const TPtr16 &aRhs) { if(this == &aRhs) return *this; // handle self assignment if(aRhs.Length() > MaxLength()) FatalError(); memcpy(iPtr, aRhs.Ptr(), aRhs.Length()*sizeof(TUint16)); SetLength(aRhs.Length()); return *this; }
const DistMatrix<T,STAR,STAR>& DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,STAR,VR>& A ) { #ifndef RELEASE CallStackEntry entry("[* ,* ] = [* ,VR]"); this->AssertNotLocked(); this->AssertSameGrid( A.Grid() ); #endif const elem::Grid& g = this->Grid(); this->ResizeTo( A.Height(), A.Width() ); if( !this->Participating() ) return *this; const Int p = g.Size(); const Int height = this->Height(); const Int width = this->Width(); const Int localWidthOfA = A.LocalWidth(); const Int maxLocalWidth = MaxLength(width,p); const Int portionSize = mpi::Pad( height*maxLocalWidth ); T* buffer = this->auxMemory_.Require( (p+1)*portionSize ); T* sendBuf = &buffer[0]; T* recvBuf = &buffer[portionSize]; // Pack const Int ALDim = A.LDim(); const T* ABuf = A.LockedBuffer(); PARALLEL_FOR for( Int jLoc=0; jLoc<localWidthOfA; ++jLoc ) MemCopy( &sendBuf[jLoc*height], &ABuf[jLoc*ALDim], height ); // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, g.VRComm() ); // Unpack T* thisBuf = this->Buffer(); const Int thisLDim = this->LDim(); const Int rowAlignmentOfA = A.RowAlignment(); OUTER_PARALLEL_FOR for( Int k=0; k<p; ++k ) { const T* data = &recvBuf[k*portionSize]; const Int rowShift = Shift_( k, rowAlignmentOfA, p ); const Int localWidth = Length_( width, rowShift, p ); INNER_PARALLEL_FOR for( Int jLoc=0; jLoc<localWidth; ++jLoc ) MemCopy ( &thisBuf[(rowShift+jLoc*p)*thisLDim], &data[jLoc*height], height ); } this->auxMemory_.Release(); return *this; }
void TPtr16::Copy(const TDesC8 &aDes) { // This is not quite 100% compatible because it does a correct // UTF-8 to UCS-2 conversion, instead of just stuffing in zeros. TInt outLength = 0; TText *outBuf = utf16FromUtf8(aDes.Ptr(), aDes.Length(), outLength); if(outLength > MaxLength()) FatalError(); memcpy(iPtr, outBuf, outLength*2); SetLength(outLength); delete [] outBuf; }
nsresult HTMLTextAreaElement::GetValidationMessage( nsAString& aValidationMessage, ValidityStateType aType) { nsresult rv = NS_OK; switch (aType) { case VALIDITY_STATE_TOO_LONG: { nsAutoString message; int32_t maxLength = MaxLength(); int32_t textLength = GetTextLength(); nsAutoString strMaxLength; nsAutoString strTextLength; strMaxLength.AppendInt(maxLength); strTextLength.AppendInt(textLength); const char16_t* params[] = {strMaxLength.get(), strTextLength.get()}; rv = nsContentUtils::FormatLocalizedString( nsContentUtils::eDOM_PROPERTIES, "FormValidationTextTooLong", params, message); aValidationMessage = message; } break; case VALIDITY_STATE_TOO_SHORT: { nsAutoString message; int32_t minLength = MinLength(); int32_t textLength = GetTextLength(); nsAutoString strMinLength; nsAutoString strTextLength; strMinLength.AppendInt(minLength); strTextLength.AppendInt(textLength); const char16_t* params[] = {strMinLength.get(), strTextLength.get()}; rv = nsContentUtils::FormatLocalizedString( nsContentUtils::eDOM_PROPERTIES, "FormValidationTextTooShort", params, message); aValidationMessage = message; } break; case VALIDITY_STATE_VALUE_MISSING: { nsAutoString message; rv = nsContentUtils::GetLocalizedString(nsContentUtils::eDOM_PROPERTIES, "FormValidationValueMissing", message); aValidationMessage = message; } break; default: rv = nsIConstraintValidation::GetValidationMessage(aValidationMessage, aType); } return rv; }
void PartialRowScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::PartialRowScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("Matrix sizes did not match"); if( !B.Participating() ) return; if( B.RowAlign() % A.RowStride() == A.RowAlign() ) { const Int rowStride = B.RowStride(); const Int rowStridePart = B.PartialRowStride(); const Int rowStrideUnion = B.PartialUnionRowStride(); const Int rowRankPart = B.PartialRowRank(); const Int height = B.Height(); const Int width = B.Width(); const Int maxLocalWidth = MaxLength( width, rowStride ); const Int recvSize = mpi::Pad( height*maxLocalWidth ); const Int sendSize = rowStrideUnion*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::PartialRowStridedPack ( height, width, B.RowAlign(), rowStride, rowStrideUnion, rowStridePart, rowRankPart, A.RowShift(), A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionRowComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, height, B.LocalWidth(), buffer.data(), 1, height, B.Buffer(), 1, B.LDim() ); } else LogicError("Unaligned PartialRowScatter not implemented"); }
Real32 BlendedKeyframeAnimator::getLength(void) const { if(getMFKeyframeSequences()->size() > 0 && checkSequencesValidity()) { Real32 MaxLength(0.0f); for(UInt32 i(0) ; i< getMFKeyframeSequences()->size() ; ++i) { MaxLength = osgMax(MaxLength, getKeyframeSequences(i)->getKeys().back()); } return MaxLength; } else { return 0.0f; } }
bool HTMLTextAreaElement::IsTooLong() { if (!mValueChanged || !mLastValueChangeWasInteractive || !HasAttr(kNameSpaceID_None, nsGkAtoms::maxlength)) { return false; } int32_t maxLength = MaxLength(); // Maxlength of -1 means parsing error. if (maxLength == -1) { return false; } int32_t textLength = GetTextLength(); return textLength > maxLength; }
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void Compress(char* source, char* dest, unsigned offsetBits, unsigned lengthBits) { assert(source != nullptr); assert(dest != nullptr); offsetBits_ = offsetBits; lengthBits_ = lengthBits; BinaryReader reader(source); SlidingWindow window(&reader, bufferSize_, MaxOffset(), MaxLength() - 1); window.Initialize(); BinaryWriter writer(dest); WriteHeader(writer); while(window.IsEOF() == false) { unsigned length; unsigned offset; if(window.FindLongestMatch(offset, length)) { unsigned char current; window.AdvanceWindow(length + 1); if(window.IsOutside()) { current = window.CurrentByte(-2); length--; } else current = window.CurrentByte(-1); if(length > 0) { writer.WriteBits(length, lengthBits_); writer.WriteBits(offset, offsetBits_); writer.WriteByte(current); if(logTokens_) { char buffer[1024]; std::sprintf(buffer, "o: %d, l: %d, c: %d ", offset, length, current); tokenLog_ += buffer; } } else { writer.WriteBits(0, lengthBits_); writer.WriteByte(current); if(logTokens_) { char buffer[1024]; std::sprintf(buffer, "o: %d, l: %d, c: %d ", 0, 0, current); tokenLog_ += buffer; } } } else { writer.WriteBits(0, lengthBits_); writer.WriteByte(window.CurrentByte()); if(logTokens_) { char buffer[1024]; std::sprintf(buffer, "o: %d, l: %d, c: %d ", 0, 0, window.CurrentByte()); tokenLog_ += buffer; } window.AdvanceWindow(1); } } writer.WriteBits(MaxLength(), lengthBits_); }
const DistMatrix<T,STAR,STAR>& DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,STAR,MD>& A ) { #ifndef RELEASE CallStackEntry entry("[* ,* ] = [* ,MD]"); this->AssertNotLocked(); this->AssertSameGrid( A.Grid() ); #endif const elem::Grid& g = this->Grid(); this->ResizeTo( A.Height(), A.Width() ); if( !this->Participating() ) return *this; const Int p = g.Size(); const Int lcm = g.LCM(); const Int ownerPath = A.diagPath_; const Int ownerPathRank = A.rowAlignment_; const Int height = this->Height(); const Int width = this->Width(); const Int localWidth = A.LocalWidth(); const Int maxLocalWidth = MaxLength( width, lcm ); const Int portionSize = mpi::Pad( height*maxLocalWidth ); // Since a MD communicator has not been implemented, we will take // the suboptimal route of 'rounding up' everyone's contribution over // the VC communicator. T* buffer = this->auxMemory_.Require( (p+1)*portionSize ); T* sendBuf = &buffer[0]; T* recvBuf = &buffer[portionSize]; // Pack if( A.Participating() ) { const Int ALDim = A.LDim(); const T* ABuf = A.LockedBuffer(); PARALLEL_FOR for( Int jLoc=0; jLoc<localWidth; ++jLoc ) MemCopy( &sendBuf[jLoc*height], &ABuf[jLoc*ALDim], height ); } // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, g.VCComm() ); // Unpack T* thisBuf = this->Buffer(); const Int thisLDim = this->LDim(); OUTER_PARALLEL_FOR for( Int k=0; k<p; ++k ) { if( g.DiagPath( k ) == ownerPath ) { const T* data = &recvBuf[k*portionSize]; const Int thisPathRank = g.DiagPathRank( k ); const Int thisRowShift = Shift_( thisPathRank, ownerPathRank, lcm ); const Int thisLocalWidth = Length_( width, thisRowShift, lcm ); INNER_PARALLEL_FOR for( Int jLoc=0; jLoc<thisLocalWidth; ++jLoc ) MemCopy ( &thisBuf[(thisRowShift+jLoc*lcm)*thisLDim], &data[jLoc*height], height ); } } this->auxMemory_.Release(); return *this; }
void AllGather ( const DistMatrix<T, U, V >& A, DistMatrix<T,Collect<U>(),Collect<V>()>& B ) { DEBUG_ONLY(CSE cse("copy::AllGather")) AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.SetGrid( A.Grid() ); B.Resize( height, width ); if( A.Participating() ) { const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int distStride = colStride*rowStride; const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,rowStride); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); vector<T> buf( (distStride+1)*portionSize ); T* sendBuf = &buf[0]; T* recvBuf = &buf[portionSize]; // Pack util::InterleaveMatrix ( A.LocalHeight(), A.LocalWidth(), A.LockedBuffer(), 1, A.LDim(), sendBuf, 1, A.LocalHeight() ); // Communicate mpi::AllGather ( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() ); // Unpack util::StridedUnpack ( height, width, A.ColAlign(), colStride, A.RowAlign(), rowStride, recvBuf, portionSize, B.Buffer(), B.LDim() ); } if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF ) { // Pack from the root const Int BLocalHeight = B.LocalHeight(); const Int BLocalWidth = B.LocalWidth(); vector<T> buf(BLocalHeight*BLocalWidth); if( A.CrossRank() == A.Root() ) util::InterleaveMatrix ( BLocalHeight, BLocalWidth, B.LockedBuffer(), 1, B.LDim(), buf.data(), 1, BLocalHeight ); // Broadcast from the root mpi::Broadcast ( buf.data(), BLocalHeight*BLocalWidth, A.Root(), A.CrossComm() ); // Unpack if not the root if( A.CrossRank() != A.Root() ) util::InterleaveMatrix ( BLocalHeight, BLocalWidth, buf.data(), 1, BLocalHeight, B.Buffer(), 1, B.LDim() ); } }
NTL_START_IMPL // FIXME: why do vec_GF2 and GF2X use different strategies for // keeping high order bits cleared? I don't think it matters // much, but it is strange. void vec_GF2::SetLength(long n) { long len = length(); if (n == len) return; if (n < 0) LogicError("negative length in vec_GF2::SetLength"); if (NTL_OVERFLOW(n, 1, 0)) ResourceError("vec_GF2::SetLength: excessive length"); if (fixed()) LogicError("SetLength: can't change this vector's length"); long wdlen = (n+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; if (n < len) { // have to clear bits n..len-1 long q = n/NTL_BITS_PER_LONG; long p = n - q*NTL_BITS_PER_LONG; _ntl_ulong *x = rep.elts(); x[q] &= (1UL << p) - 1UL; long q1 = (len-1)/NTL_BITS_PER_LONG; long i; for (i = q+1; i <= q1; i++) x[i] = 0; _len = n; rep.QuickSetLength(wdlen); return; } long maxlen = MaxLength(); if (n <= maxlen) { _len = n; rep.QuickSetLength(wdlen); return; } long alloc = rep.MaxLength(); if (wdlen <= alloc) { _len = n; _maxlen = (n << 1); rep.QuickSetLength(wdlen); return; } // have to grow vector and initialize to zero rep.SetLength(wdlen); wdlen = rep.MaxLength(); // careful! rep.MaxLength() may exceed the // old value of wdlen...this is due to // the awkward semantics of WordVector. _ntl_ulong *x = rep.elts(); long i; for (i = alloc; i < wdlen; i++) x[i] = 0; _len = n; _maxlen = (n << 1); }
void ColScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::ColScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("A and B must be the same size"); #ifdef EL_VECTOR_WARNINGS if( A.Width() == 1 && B.Grid().Rank() == 0 ) { cerr << "The vector version of ColScatter does not" " yet have a vector version implemented, but it would only " "require a modification of the vector version of RowScatter" << endl; } #endif #ifdef EL_CACHE_WARNINGS if( A.Width() != 1 && B.Grid().Rank() == 0 ) { cerr << "axpy_contract::ColScatter potentially causes a large " "amount of cache-thrashing. If possible, avoid it by forming the " "(conjugate-)transpose of the [* ,V] matrix instead." << endl; } #endif if( !B.Participating() ) return; const Int height = B.Height(); const Int localHeight = B.LocalHeight(); const Int localWidth = B.LocalWidth(); const Int colAlign = B.ColAlign(); const Int colStride = B.ColStride(); const Int rowDiff = B.RowAlign()-A.RowAlign(); // TODO: Allow for modular equivalence if possible if( rowDiff == 0 ) { const Int maxLocalHeight = MaxLength(height,colStride); const Int recvSize = mpi::Pad( maxLocalHeight*localWidth ); const Int sendSize = colStride*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::ColStridedPack ( height, localWidth, colAlign, colStride, A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.ColComm() ); // Update with our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, localWidth, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); } else { #ifdef EL_UNALIGNED_WARNINGS if( B.Grid().Rank() == 0 ) cerr << "Unaligned ColScatter" << endl; #endif const Int localWidthA = A.LocalWidth(); const Int maxLocalHeight = MaxLength(height,colStride); const Int recvSize_RS = mpi::Pad( maxLocalHeight*localWidthA ); const Int sendSize_RS = colStride*recvSize_RS; const Int recvSize_SR = localHeight*localWidth; //vector<T> buffer( recvSize_RS + Max(sendSize_RS,recvSize_SR) ); vector<T> buffer; buffer.reserve( recvSize_RS + Max(sendSize_RS,recvSize_SR) ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[recvSize_RS]; // Pack copy::util::ColStridedPack ( height, localWidth, colAlign, colStride, A.LockedBuffer(), A.LDim(), secondBuf, recvSize_RS ); // Reduce-scatter over each col mpi::ReduceScatter( secondBuf, firstBuf, recvSize_RS, B.ColComm() ); // Trade reduced data with the appropriate col const Int sendCol = Mod( B.RowRank()+rowDiff, B.RowStride() ); const Int recvCol = Mod( B.RowRank()-rowDiff, B.RowStride() ); mpi::SendRecv ( firstBuf, localHeight*localWidthA, sendCol, secondBuf, localHeight*localWidth, recvCol, B.RowComm() ); // Update with our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, localWidth, secondBuf, 1, localHeight, B.Buffer(), 1, B.LDim() ); } }
void RowScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::RowScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("Matrix sizes did not match"); if( !B.Participating() ) return; const Int width = B.Width(); const Int colDiff = B.ColAlign()-A.ColAlign(); if( colDiff == 0 ) { if( width == 1 ) { const Int localHeight = B.LocalHeight(); const Int portionSize = mpi::Pad( localHeight ); //vector<T> buffer( portionSize ); vector<T> buffer; buffer.reserve( portionSize ); // Reduce to rowAlign const Int rowAlign = B.RowAlign(); mpi::Reduce ( A.LockedBuffer(), buffer.data(), portionSize, rowAlign, B.RowComm() ); if( B.RowRank() == rowAlign ) { axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, 1, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); } } else { const Int rowStride = B.RowStride(); const Int rowAlign = B.RowAlign(); const Int localHeight = B.LocalHeight(); const Int localWidth = B.LocalWidth(); const Int maxLocalWidth = MaxLength(width,rowStride); const Int portionSize = mpi::Pad( localHeight*maxLocalWidth ); const Int sendSize = rowStride*portionSize; // Pack //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); copy::util::RowStridedPack ( localHeight, width, rowAlign, rowStride, A.LockedBuffer(), A.LDim(), buffer.data(), portionSize ); // Communicate mpi::ReduceScatter( buffer.data(), portionSize, B.RowComm() ); // Update with our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, localWidth, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); } } else { #ifdef EL_UNALIGNED_WARNINGS if( B.Grid().Rank() == 0 ) cerr << "Unaligned RowScatter" << endl; #endif const Int colRank = B.ColRank(); const Int colStride = B.ColStride(); const Int sendRow = Mod( colRank+colDiff, colStride ); const Int recvRow = Mod( colRank-colDiff, colStride ); const Int localHeight = B.LocalHeight(); const Int localHeightA = A.LocalHeight(); if( width == 1 ) { //vector<T> buffer( localHeight+localHeightA ); vector<T> buffer; buffer.reserve( localHeight+localHeightA ); T* sendBuf = &buffer[0]; T* recvBuf = &buffer[localHeightA]; // Reduce to rowAlign const Int rowAlign = B.RowAlign(); mpi::Reduce ( A.LockedBuffer(), sendBuf, localHeightA, rowAlign, B.RowComm() ); if( B.RowRank() == rowAlign ) { // Perform the realignment mpi::SendRecv ( sendBuf, localHeightA, sendRow, recvBuf, localHeight, recvRow, B.ColComm() ); axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, 1, recvBuf, 1, localHeight, B.Buffer(), 1, B.LDim() ); } } else { const Int rowStride = B.RowStride(); const Int rowAlign = B.RowAlign(); const Int localWidth = B.LocalWidth(); const Int maxLocalWidth = MaxLength(width,rowStride); const Int recvSize_RS = mpi::Pad( localHeightA*maxLocalWidth ); const Int sendSize_RS = rowStride * recvSize_RS; const Int recvSize_SR = localHeight * localWidth; //vector<T> buffer( recvSize_RS + Max(sendSize_RS,recvSize_SR) ); vector<T> buffer; buffer.reserve( recvSize_RS + Max(sendSize_RS,recvSize_SR) ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[recvSize_RS]; // Pack copy::util::RowStridedPack ( localHeightA, width, rowAlign, rowStride, A.LockedBuffer(), A.LDim(), secondBuf, recvSize_RS ); // Reduce-scatter over each process row mpi::ReduceScatter( secondBuf, firstBuf, recvSize_RS, B.RowComm() ); // Trade reduced data with the appropriate process row mpi::SendRecv ( firstBuf, localHeightA*localWidth, sendRow, secondBuf, localHeight*localWidth, recvRow, B.ColComm() ); // Update with our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, localWidth, secondBuf, 1, localHeight, B.Buffer(), 1, B.LDim() ); } } }
#endif B.AlignColsAndResize ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false ); if( !A.Participating() ) return; EL_DEBUG_ONLY( if( A.LocalWidth() != A.Width() ) LogicError("This routine assumes rows are not distributed"); ) const Int colStrideUnion = A.PartialUnionColStride(); const Int colStridePart = A.PartialColStride(); const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart); const Int maxLocalHeight = MaxLength(height,A.ColStride()); const Int portionSize = mpi::Pad( maxLocalHeight*width ); if( colDiff == 0 ) { if( A.PartialUnionColStride() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else { vector<T> buffer; FastResize( buffer, (colStrideUnion+1)*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[portionSize];
void InPlaceRedist ( DistMatrix<F>& paddedZ, Int rowAlign, const Base<F>* readBuffer ) { typedef Base<F> Real; const Grid& g = paddedZ.Grid(); const Int height = paddedZ.Height(); const Int width = paddedZ.Width(); const Int r = g.Height(); const Int c = g.Width(); const Int p = r * c; const Int row = g.Row(); const Int col = g.Col(); const Int rowShift = paddedZ.RowShift(); const Int colAlign = paddedZ.ColAlign(); const Int localWidth = Length(width,g.VRRank(),rowAlign,p); const Int maxHeight = MaxLength(height,r); const Int maxWidth = MaxLength(width,p); const Int portionSize = mpi::Pad( maxHeight*maxWidth ); // Allocate our send/recv buffers std::vector<Real> buffer(2*r*portionSize); Real* sendBuffer = &buffer[0]; Real* recvBuffer = &buffer[r*portionSize]; // Pack OUTER_PARALLEL_FOR for( Int k=0; k<r; ++k ) { Real* data = &sendBuffer[k*portionSize]; const Int thisColShift = Shift(k,colAlign,r); const Int thisLocalHeight = Length(height,thisColShift,r); INNER_PARALLEL_FOR_COLLAPSE2 for( Int j=0; j<localWidth; ++j ) for( Int i=0; i<thisLocalHeight; ++i ) data[i+j*thisLocalHeight] = readBuffer[thisColShift+i*r+j*height]; } // Communicate mpi::AllToAll ( sendBuffer, portionSize, recvBuffer, portionSize, g.ColComm() ); // Unpack const Int localHeight = Length(height,row,colAlign,r); OUTER_PARALLEL_FOR for( Int k=0; k<r; ++k ) { const Real* data = &recvBuffer[k*portionSize]; const Int thisRank = col+k*c; const Int thisRowShift = Shift(thisRank,rowAlign,p); const Int thisRowOffset = (thisRowShift-rowShift) / c; const Int thisLocalWidth = Length(width,thisRowShift,p); INNER_PARALLEL_FOR for( Int j=0; j<thisLocalWidth; ++j ) { const Real* dataCol = &(data[j*localHeight]); Real* thisCol = (Real*)paddedZ.Buffer(0,thisRowOffset+j*r); if( IsComplex<F>::val ) { for( Int i=0; i<localHeight; ++i ) { thisCol[2*i] = dataCol[i]; thisCol[2*i+1] = 0; } } else { MemCopy( thisCol, dataCol, localHeight ); } } } }
void Scatter ( const DistMatrix<T,CIRC,CIRC>& A, ElementalMatrix<T>& B ) { DEBUG_CSE AssertSameGrids( A, B ); const Int m = A.Height(); const Int n = A.Width(); const Int colStride = B.ColStride(); const Int rowStride = B.RowStride(); B.Resize( m, n ); if( B.CrossSize() != 1 || B.RedundantSize() != 1 ) { // TODO: // Broadcast over the redundant communicator and use mpi::Translate // rank to determine whether a process is the root of the broadcast. GeneralPurpose( A, B ); return; } const Int pkgSize = mpi::Pad(MaxLength(m,colStride)*MaxLength(n,rowStride)); const Int recvSize = pkgSize; const Int sendSize = B.DistSize()*pkgSize; // Translate the root of A into the DistComm of B (if possible) const Int root = A.Root(); const Int target = mpi::Translate( A.CrossComm(), root, B.DistComm() ); if( target == mpi::UNDEFINED ) return; if( B.DistSize() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); return; } vector<T> buffer; T* recvBuf=0; // some compilers (falsely) warn otherwise if( A.CrossRank() == root ) { FastResize( buffer, sendSize+recvSize ); T* sendBuf = &buffer[0]; recvBuf = &buffer[sendSize]; // Pack the send buffer copy::util::StridedPack ( m, n, B.ColAlign(), colStride, B.RowAlign(), rowStride, A.LockedBuffer(), A.LDim(), sendBuf, pkgSize ); // Scatter from the root mpi::Scatter ( sendBuf, pkgSize, recvBuf, pkgSize, target, B.DistComm() ); } else { FastResize( buffer, recvSize ); recvBuf = &buffer[0]; // Perform the receiving portion of the scatter from the non-root mpi::Scatter ( static_cast<T*>(0), pkgSize, recvBuf, pkgSize, target, B.DistComm() ); } // Unpack copy::util::InterleaveMatrix ( B.LocalHeight(), B.LocalWidth(), recvBuf, 1, B.LocalHeight(), B.Buffer(), 1, B.LDim() ); }
void Slider::updateLayout(void) { UInt16 MajorAxis, MinorAxis; if(getOrientation() == VERTICAL_ORIENTATION) { MajorAxis = 1; } else { MajorAxis = 0; } MinorAxis = (MajorAxis+1)%2; updateSliderTrack(); //Update the Track if(getDrawTrack() && getTrackDrawObject() != NULL) { Pnt2f BorderTopLeft, BorderBottomRight; getInsideInsetsBounds(BorderTopLeft, BorderBottomRight); Vec2f Size(getTrackDrawObject()->getPreferredSize()); Pnt2f AlignedPosition; Size[MajorAxis] = getTrackLength(); if(getOrientation() == VERTICAL_ORIENTATION) { AlignedPosition = calculateAlignment(BorderTopLeft, (BorderBottomRight-BorderTopLeft), Size, 0.5, getAlignment()); } else { AlignedPosition = calculateAlignment(BorderTopLeft, (BorderBottomRight-BorderTopLeft), Size, getAlignment(), 0.5); } getTrackDrawObject()->setPosition(AlignedPosition); getTrackDrawObject()->setSize(Size); } //Update the MinorTickMarks if(getDrawMinorTicks() && getRangeModel() != NULL) { Pnt2f MinorTickTopLeft, MinorTickBottomRight; getDrawObjectBounds(*editMFMinorTickDrawObjects(), MinorTickTopLeft, MinorTickBottomRight); Vec2f Alignment; Real32 MaxLength(0.0); for(UInt32 i(0) ; i<getMFMinorTickDrawObjects()->size() ; ++i) { Pnt2f DrawObjectTopLeft, DrawObjectBottomRight; getMinorTickDrawObjects(i)->getBounds(DrawObjectTopLeft, DrawObjectBottomRight); MaxLength = osgMax(MaxLength, DrawObjectBottomRight.x()-DrawObjectTopLeft.x()); } editMFMinorTickPositions()->clear(); for(UInt32 i(0) ; i< osgAbs<Int32>(getMaximum() - getMinimum())/getMinorTickSpacing() ; ++i) { if( (i * getMinorTickSpacing())%getMajorTickSpacing() != 0 ) { Alignment[MajorAxis] = static_cast<Real32>(i * getMinorTickSpacing())/static_cast<Real32>(getMaximum() - getMinimum()); editMFMinorTickPositions()->push_back( calculateSliderAlignment(getSliderTrackTopLeft(), getSliderTrackSize(), (MinorTickBottomRight - MinorTickTopLeft), Alignment.y(), Alignment.x())); if(getTicksOnRightBottom()) { editMFMinorTickPositions()->back()[MinorAxis] = getTrackDrawObject()->getPosition()[MinorAxis] + getTrackDrawObject()->getSize()[MinorAxis] + getTrackToTickOffset(); } else { editMFMinorTickPositions()->back()[MinorAxis] = getTrackDrawObject()->getPosition()[MinorAxis] - getTrackToTickOffset() - MaxLength; } } } } //Update the MajorTickMarks if(getDrawMajorTicks() && getRangeModel() != NULL) { Pnt2f MajorTickTopLeft, MajorTickBottomRight; getDrawObjectBounds(*editMFMajorTickDrawObjects(), MajorTickTopLeft, MajorTickBottomRight); Vec2f Alignment; Real32 MaxLength(0.0); for(UInt32 i(0) ; i<getMFMajorTickDrawObjects()->size() ; ++i) { Pnt2f DrawObjectTopLeft, DrawObjectBottomRight; getMajorTickDrawObjects(i)->getBounds(DrawObjectTopLeft, DrawObjectBottomRight); MaxLength = osgMax(MaxLength, DrawObjectBottomRight.x()-DrawObjectTopLeft.x()); } editMFMajorTickPositions()->clear(); for(UInt32 i(0) ; i<= osgAbs<Int32>(getMaximum() - getMinimum())/getMajorTickSpacing() ; ++i) { Alignment[MajorAxis] = static_cast<Real32>(i * getMajorTickSpacing())/static_cast<Real32>(getMaximum() - getMinimum()); editMFMajorTickPositions()->push_back( calculateSliderAlignment(getSliderTrackTopLeft(), getSliderTrackSize(), (MajorTickBottomRight - MajorTickTopLeft), Alignment.y(), Alignment.x())); if(getTicksOnRightBottom()) { editMFMajorTickPositions()->back()[MinorAxis] = getTrackDrawObject()->getPosition()[MinorAxis] + getTrackDrawObject()->getSize()[MinorAxis] + getTrackToTickOffset(); } else { editMFMajorTickPositions()->back()[MinorAxis] = getTrackDrawObject()->getPosition()[MinorAxis] - getTrackToTickOffset() - MaxLength; } } } //Update the Labels if(getDrawLabels() && getRangeModel() != NULL) { Vec2f Alignment; Pnt2f Pos; FieldContainerMap::const_iterator Itor; for(Itor = getLabelMap().begin() ; Itor != getLabelMap().end() ; ++Itor) { Alignment[MajorAxis] = static_cast<Real32>((*Itor).first - getMinimum())/static_cast<Real32>(getMaximum() - getMinimum()); Pos = calculateSliderAlignment(getSliderTrackTopLeft(), getSliderTrackSize(), dynamic_pointer_cast<Component>((*Itor).second)->getPreferredSize(), Alignment.y(), Alignment.x()); if(getTicksOnRightBottom()) { Pos[MinorAxis] = getTrackDrawObject()->getPosition()[MinorAxis] + getTrackDrawObject()->getSize()[MinorAxis] + getTrackToLabelOffset(); } else { Pos[MinorAxis] = getTrackDrawObject()->getPosition()[MinorAxis] - getTrackToLabelOffset() - dynamic_pointer_cast<Component>((*Itor).second)->getPreferredSize()[MinorAxis]; } dynamic_pointer_cast<Component>((*Itor).second)->setPosition(Pos); dynamic_pointer_cast<Component>((*Itor).second)->setSize(dynamic_pointer_cast<Component>((*Itor).second)->getPreferredSize()); } } }
void ColAllToAllDemote ( const DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& A, DistMatrix<T, U, V >& B ) { DEBUG_ONLY(CallStackEntry cse("copy::ColAllToAllDemote")) AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.AlignColsAndResize( A.ColAlign(), height, width, false, false ); if( !B.Participating() ) return; const Int colAlign = B.ColAlign(); const Int rowAlignA = A.RowAlign(); const Int colStride = B.ColStride(); const Int colStridePart = B.PartialColStride(); const Int colStrideUnion = B.PartialUnionColStride(); const Int colRankPart = B.PartialColRank(); const Int colDiff = (colAlign%colStridePart) - A.ColAlign(); const Int colShiftA = A.ColShift(); const Int localHeightB = B.LocalHeight(); const Int localWidthA = A.LocalWidth(); const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,colStrideUnion); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); std::vector<T> buffer( 2*colStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[colStrideUnion*portionSize]; if( colDiff == 0 ) { // Pack util::PartialColStridedPack ( height, localWidthA, colAlign, colStride, colStrideUnion, colStridePart, colRankPart, colShiftA, A.LockedBuffer(), A.LDim(), firstBuf, portionSize ); // Simultaneously Scatter in columns and Gather in rows mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, B.PartialUnionColComm() ); // Unpack util::RowStridedUnpack ( localHeightB, width, rowAlignA, colStrideUnion, secondBuf, portionSize, B.Buffer(), B.LDim() ); } else { #ifdef EL_UNALIGNED_WARNINGS if( B.Grid().Rank() == 0 ) std::cerr << "Unaligned ColAllToAllDemote" << std::endl; #endif const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart ); const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart ); // Pack util::PartialColStridedPack ( height, localWidthA, colAlign, colStride, colStrideUnion, colStridePart, sendColRankPart, colShiftA, A.LockedBuffer(), A.LDim(), secondBuf, portionSize ); // Simultaneously Scatter in columns and Gather in rows mpi::AllToAll ( secondBuf, portionSize, firstBuf, portionSize, B.PartialUnionColComm() ); // Realign the result mpi::SendRecv ( firstBuf, colStrideUnion*portionSize, sendColRankPart, secondBuf, colStrideUnion*portionSize, recvColRankPart, B.PartialColComm() ); // Unpack util::RowStridedUnpack ( localHeightB, width, rowAlignA, colStrideUnion, secondBuf, portionSize, B.Buffer(), B.LDim() ); } }
void ColAllToAllPromote ( const DistMatrix<T, U, V >& A, DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B ) { DEBUG_CSE AssertSameGrids( A, B ); const Int height = A.Height(); const Int width = A.Width(); B.AlignColsAndResize ( Mod(A.ColAlign(),B.ColStride()), height, width, false, false ); if( !B.Participating() ) return; const Int colStride = A.ColStride(); const Int colStridePart = A.PartialColStride(); const Int colStrideUnion = A.PartialUnionColStride(); const Int colRankPart = A.PartialColRank(); const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart); const Int maxLocalHeight = MaxLength(height,colStride); const Int maxLocalWidth = MaxLength(width,colStrideUnion); const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth ); if( colDiff == 0 ) { if( A.PartialUnionColStride() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else { vector<T> buffer; FastResize( buffer, 2*colStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[colStrideUnion*portionSize]; // Pack util::RowStridedPack ( A.LocalHeight(), width, B.RowAlign(), colStrideUnion, A.LockedBuffer(), A.LDim(), firstBuf, portionSize ); // Simultaneously Gather in columns and Scatter in rows mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, A.PartialUnionColComm() ); // Unpack util::PartialColStridedUnpack ( height, B.LocalWidth(), A.ColAlign(), colStride, colStrideUnion, colStridePart, colRankPart, B.ColShift(), secondBuf, portionSize, B.Buffer(), B.LDim() ); } } else { #ifdef EL_UNALIGNED_WARNINGS if( A.Grid().Rank() == 0 ) cerr << "Unaligned PartialColAllToAllPromote" << endl; #endif const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart ); const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart ); vector<T> buffer; FastResize( buffer, 2*colStrideUnion*portionSize ); T* firstBuf = &buffer[0]; T* secondBuf = &buffer[colStrideUnion*portionSize]; // Pack util::RowStridedPack ( A.LocalHeight(), width, B.RowAlign(), colStrideUnion, A.LockedBuffer(), A.LDim(), secondBuf, portionSize ); // Realign the input mpi::SendRecv ( secondBuf, colStrideUnion*portionSize, sendColRankPart, firstBuf, colStrideUnion*portionSize, recvColRankPart, A.PartialColComm() ); // Simultaneously Scatter in columns and Gather in rows mpi::AllToAll ( firstBuf, portionSize, secondBuf, portionSize, A.PartialUnionColComm() ); // Unpack util::PartialColStridedUnpack ( height, B.LocalWidth(), A.ColAlign(), colStride, colStrideUnion, colStridePart, recvColRankPart, B.ColShift(), secondBuf, portionSize, B.Buffer(), B.LDim() ); } }
void TransposeDist( const DistMatrix<T,U,V>& A, DistMatrix<T,V,U>& B ) { DEBUG_ONLY(CSE cse("copy::TransposeDist")) AssertSameGrids( A, B ); const Grid& g = B.Grid(); B.Resize( A.Height(), A.Width() ); if( !B.Participating() ) return; const Int colStrideA = A.ColStride(); const Int rowStrideA = A.RowStride(); const Int distSize = A.DistSize(); if( A.DistSize() == 1 && B.DistSize() == 1 ) { Copy( A.LockedMatrix(), B.Matrix() ); } else if( A.Width() == 1 ) { const Int height = A.Height(); const Int maxLocalHeight = MaxLength(height,distSize); const Int portionSize = mpi::Pad( maxLocalHeight ); const Int colDiff = Shift(A.DistRank(),A.ColAlign(),distSize) - Shift(B.DistRank(),B.ColAlign(),distSize); const Int sendRankB = Mod( B.DistRank()+colDiff, distSize ); const Int recvRankA = Mod( A.DistRank()-colDiff, distSize ); const Int recvRankB = (recvRankA/colStrideA)+rowStrideA*(recvRankA%colStrideA); vector<T> buffer; FastResize( buffer, (colStrideA+rowStrideA)*portionSize ); T* sendBuf = &buffer[0]; T* recvBuf = &buffer[colStrideA*portionSize]; if( A.RowRank() == A.RowAlign() ) { // Pack // TODO: Use kernel from copy::util const Int AColShift = A.ColShift(); const T* ABuf = A.LockedBuffer(); EL_PARALLEL_FOR for( Int k=0; k<rowStrideA; ++k ) { T* data = &recvBuf[k*portionSize]; const Int shift = Shift_(A.ColRank()+colStrideA*k,A.ColAlign(),distSize); const Int offset = (shift-AColShift) / colStrideA; const Int thisLocalHeight = Length_(height,shift,distSize); for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc ) data[iLoc] = ABuf[offset+iLoc*rowStrideA]; } } // (e.g., A[VC,STAR] <- A[MC,MR]) mpi::Scatter ( recvBuf, portionSize, sendBuf, portionSize, A.RowAlign(), A.RowComm() ); // (e.g., A[VR,STAR] <- A[VC,STAR]) mpi::SendRecv ( sendBuf, portionSize, sendRankB, recvBuf, portionSize, recvRankB, B.DistComm() ); // (e.g., A[MR,MC] <- A[VR,STAR]) mpi::Gather ( recvBuf, portionSize, sendBuf, portionSize, B.RowAlign(), B.RowComm() ); if( B.RowRank() == B.RowAlign() ) { // Unpack // TODO: Use kernel from copy::util T* bufB = B.Buffer(); EL_PARALLEL_FOR for( Int k=0; k<colStrideA; ++k ) { const T* data = &sendBuf[k*portionSize]; const Int shift = Shift_(B.ColRank()+rowStrideA*k,B.ColAlign(),distSize); const Int offset = (shift-B.ColShift()) / rowStrideA; const Int thisLocalHeight = Length_(height,shift,distSize); for( Int iLoc=0; iLoc<thisLocalHeight; ++iLoc ) bufB[offset+iLoc*colStrideA] = data[iLoc]; } } }