void TestCorrectness ( UpperOrLower uplo, const DistMatrix<F>& A, const DistMatrix<F,STAR,STAR>& t, DistMatrix<F>& AOrig, bool print, bool display ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int n = AOrig.Height(); const Real infNormAOrig = InfinityNorm( AOrig ); const Real frobNormAOrig = FrobeniusNorm( AOrig ); if( g.Rank() == 0 ) Output("Testing error..."); // Set H to the appropriate Hessenberg portion of A DistMatrix<F> H( A ); if( uplo == LOWER ) MakeTrapezoidal( LOWER, H, 1 ); else MakeTrapezoidal( UPPER, H, -1 ); if( print ) Print( H, "Hessenberg" ); if( display ) Display( H, "Hessenberg" ); if( print || display ) { DistMatrix<F> Q(g); Identity( Q, n, n ); hessenberg::ApplyQ( LEFT, uplo, NORMAL, A, t, Q ); if( print ) Print( Q, "Q" ); if( display ) Display( Q, "Q" ); } // Reverse the accumulated Householder transforms hessenberg::ApplyQ( LEFT, uplo, ADJOINT, A, t, AOrig ); hessenberg::ApplyQ( RIGHT, uplo, NORMAL, A, t, AOrig ); if( print ) Print( AOrig, "Manual Hessenberg" ); if( display ) Display( AOrig, "Manual Hessenberg" ); // Compare the appropriate portion of AOrig and B if( uplo == LOWER ) MakeTrapezoidal( LOWER, AOrig, 1 ); else MakeTrapezoidal( UPPER, AOrig, -1 ); H -= AOrig; if( print ) Print( H, "Error in rotated Hessenberg" ); if( display ) Display( H, "Error in rotated Hessenberg" ); const Real infNormError = InfinityNorm( H ); const Real frobNormError = FrobeniusNorm( H ); if( g.Rank() == 0 ) Output (" ||A||_oo = ",infNormAOrig,"\n", " ||A||_F = ",frobNormAOrig,"\n", " ||H - Q^H A Q||_oo = ",infNormError,"\n", " ||H - Q^H A Q||_F = ",frobNormError); }
// Reduce across depth to get end result C void SumContributions ( mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& APartial, DistMatrix<double,MC,MR>& A ) { const int rank = mpi::CommRank( mpi::COMM_WORLD ); const Grid& meshGrid = APartial.Grid(); A.Empty(); A.AlignWith( APartial ); A.ResizeTo( APartial.Height(), APartial.Width() ); if( APartial.LocalHeight() != APartial.LocalLDim() ) throw std::logic_error ("APartial did not have matching local height/ldim"); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("A did not have matching local height/ldim"); const int dataSize = APartial.LocalHeight()*APartial.LocalWidth(); mpi::AllReduce ( APartial.LockedLocalBuffer(), A.LocalBuffer(), dataSize, mpi::SUM, depthComm ); }
inline void HermitianSVD ( UpperOrLower uplo, DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s, DistMatrix<F>& U, DistMatrix<F>& V ) { #ifndef RELEASE PushCallStack("HermitianSVD"); #endif typedef typename Base<F>::type R; // Grab an eigenvalue decomposition of A HermitianEig( uplo, A, s, V ); // Redistribute the singular values into an [MR,* ] distribution const Grid& grid = A.Grid(); DistMatrix<R,MR,STAR> s_MR_STAR( grid ); s_MR_STAR.AlignWith( V ); s_MR_STAR = s; // Set the singular values to the absolute value of the eigenvalues const int numLocalVals = s.LocalHeight(); for( int iLocal=0; iLocal<numLocalVals; ++iLocal ) { const R sigma = s.GetLocal(iLocal,0); s.SetLocal(iLocal,0,Abs(sigma)); } // Copy V into U (flipping the sign as necessary) U.AlignWith( V ); U.ResizeTo( V.Height(), V.Width() ); const int localHeight = V.LocalHeight(); const int localWidth = V.LocalWidth(); for( int jLocal=0; jLocal<localWidth; ++jLocal ) { const R sigma = s_MR_STAR.GetLocal( jLocal, 0 ); F* UCol = U.LocalBuffer( 0, jLocal ); const F* VCol = V.LockedLocalBuffer( 0, jLocal ); if( sigma >= 0 ) for( int iLocal=0; iLocal<localHeight; ++iLocal ) UCol[iLocal] = VCol[iLocal]; else for( int iLocal=0; iLocal<localHeight; ++iLocal ) UCol[iLocal] = -VCol[iLocal]; } #ifndef RELEASE PopCallStack(); #endif }
/* * Distributes A in such a way that * Layer 0 <- A(:, 0:(n/h - 1)) * Layer 1 <- A(:, (n/h):(2n/h - 1)) * . * . * . * Layer h-1 <- A(:, ((h-1)n/h):n) */ void DistributeCols ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthSize = mpi::CommSize( depthComm ); const int depthRank = mpi::CommRank( depthComm ); const int sendCount = A.LocalHeight()*A.LocalWidth(); const int recvCount = sendCount / depthSize; // For now, we will make B as large as A... // TODO: NOT DO THIS if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Local height did not match local ldim"); B.Empty(); B.AlignWith( A ); Zeros( A.Height(), A.Width(), B ); // Scatter const int localColOffset = (A.LocalWidth()/depthSize)*depthRank; mpi::Scatter ( A.LockedLocalBuffer(), recvCount, B.LocalBuffer(0,localColOffset), recvCount, 0, depthComm ); }
/* * Distributes A in such a way that * Layer 0 <- A(0:(m/h - 1), :) * Layer 1 <- A((m/h):(2m/h - 1), :) * . * . * . * Layer h-1 <- A(((h-1)m/h):m, :) */ void DistributeRows ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const int rank = mpi::CommRank( mpi::COMM_WORLD ); const int depthRank = mpi::CommRank( depthComm ); const int depthSize = mpi::CommSize( depthComm ); const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int sendCount = A.LocalHeight()*A.LocalWidth(); const int recvCount = sendCount / depthSize; // Have the root mesh pack the data for scattering std::vector<double> sendBuf; const int blockSize = A.Height() / depthSize; if( depthRank == 0 ) { sendBuf.resize( sendCount ); MemZero( &sendBuf[0], sendCount ); // TODO: Is this necessary?!? DistMatrix<double,MC,MR> AT(meshGrid), A0(meshGrid), AB(meshGrid), A1(meshGrid), A2(meshGrid); // Pack rows block by block for each layer LockedPartitionDown ( A, AT, AB, 0 ); for( int i=0; i<depthSize; ++i ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2, blockSize ); const int dataSize = A1.LocalWidth()*A1.LocalHeight(); const int offset = i*dataSize; // TODO: Avoid the extra copy... DistMatrix<double,MC,MR> A1Contig( A1 ); MemCopy ( &(sendBuf[offset]), A1Contig.LockedLocalBuffer(), dataSize ); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } } // Scatter the packed data std::vector<double> recvBuf( recvCount ); mpi::Scatter ( &sendBuf[0], recvCount, &recvBuf[0], recvCount, 0, depthComm ); // Pad received data by zero DistMatrix<double,MC,MR> dataBlock( blockSize, A.Width(), 0, 0, &recvBuf[0], blockSize/meshGrid.Height(), meshGrid ); // TODO: We can probably heavily simplify this... // // dataBlock_T <- transpose(dataBlock) // tmp_T <- padWithZeros(dataBlockT) // tmp <- transpose(tmp_T) // Layer x <- M((x*Mm/h):((x+1)*Mm/h - 1), :) DistMatrix<double,MC,MR> dataBlockTrans( meshGrid ); Transpose( dataBlock, dataBlockTrans ); std::vector<double> newData( sendCount ); MemZero( &newData[0], sendCount ); const int offset = depthRank*recvCount; MemCopy ( &(newData[offset]), dataBlockTrans.LockedLocalBuffer(), recvCount ); DistMatrix<double,MC,MR> tmpTrans ( A.Width(), A.Height(), 0, 0, &newData[0], A.Width()/meshGrid.Width(), meshGrid ); DistMatrix<double,MC,MR> tmp( meshGrid ); Transpose( tmpTrans, tmp ); Transpose( tmpTrans, B ); }
void StackedGeometricColumnScaling ( const DistMatrix<Field, U,V >& A, const DistMatrix<Field, U,V >& B, DistMatrix<Base<Field>,V,STAR>& geomScaling ) { EL_DEBUG_CSE // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments // are equal typedef Base<Field> Real; DistMatrix<Real,V,STAR> maxScalingA(A.Grid()), maxScalingB(A.Grid()); ColumnMaxNorms( A, maxScalingA ); ColumnMaxNorms( B, maxScalingB ); const Int mLocalA = A.LocalHeight(); const Int mLocalB = B.LocalHeight(); const Int nLocal = A.LocalWidth(); geomScaling.AlignWith( maxScalingA ); geomScaling.Resize( A.Width(), 1 ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.LockedMatrix(); auto& geomScalingLoc = geomScaling.Matrix(); auto& maxScalingALoc = maxScalingA.Matrix(); auto& maxScalingBLoc = maxScalingB.Matrix(); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc)); for( Int iLoc=0; iLoc<mLocalA; ++iLoc ) { const Real absVal = Abs(ALoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } for( Int iLoc=0; iLoc<mLocalB; ++iLoc ) { const Real absVal = Abs(BLoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } geomScalingLoc(jLoc) = minAbs; } mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() ); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { const Real maxAbsA = maxScalingALoc(jLoc); const Real maxAbsB = maxScalingBLoc(jLoc); const Real maxAbs = Max(maxAbsA,maxAbsB); const Real minAbs = geomScalingLoc(jLoc); geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs); } }
// Broadcast a matrix from the root grid to the others void DepthBroadcast ( const mpi::Comm& depthComm, const DistMatrix<double,MC,MR>& A, DistMatrix<double,MC,MR>& B ) { const int rank = mpi::CommRank(mpi::COMM_WORLD); const Grid& meshGrid = A.Grid(); const int meshSize = meshGrid.Size(); const int depthRank = rank / meshSize; const int localSize = A.LocalHeight()*A.LocalWidth(); if( A.LocalHeight() != A.LocalLDim() ) throw std::logic_error("Leading dimension did not match local height"); B.Empty(); B.AlignWith( A ); B.ResizeTo( A.Height(), A.Width() ); // Have the root pack the broadcast data if( depthRank == 0 ) MemCopy( B.LocalBuffer(), A.LockedLocalBuffer(), localSize ); // Broadcast from the root mpi::Broadcast( B.LocalBuffer(), localSize, 0, depthComm ); }
inline void SymmRUC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SymmRUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error("{A,B,C} must be distributed on the same grid"); #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g), CLeft(g), CRight(g); // Temporary distributions DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> AColPan_VR_STAR(g); DistMatrix<T,STAR,MR > AColPanTrans_STAR_MR(g); DistMatrix<T,MR, STAR> ARowPanTrans_MR_STAR(g); B1_MC_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( CR.Width() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedView1x2( ARowPan, A11, A12 ); LockedView2x1 ( AColPan, A01, A11 ); View1x2( CLeft, C0, C1 ); View1x2( CRight, C1, C2 ); AColPan_VR_STAR.AlignWith( CLeft ); AColPanTrans_STAR_MR.AlignWith( CLeft ); ARowPanTrans_MR_STAR.AlignWith( CRight ); //--------------------------------------------------------------------// B1_MC_STAR = B1; AColPan_VR_STAR = AColPan; AColPanTrans_STAR_MR.TransposeFrom( AColPan_VR_STAR, conjugate ); ARowPanTrans_MR_STAR.TransposeFrom( ARowPan, conjugate ); MakeTriangular( LOWER, ARowPanTrans_MR_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, AColPanTrans_STAR_MR ); LocalGemm ( NORMAL, orientation, alpha, B1_MC_STAR, ARowPanTrans_MR_STAR, T(1), CRight ); LocalGemm ( NORMAL, NORMAL, alpha, B1_MC_STAR, AColPanTrans_STAR_MR, T(1), CLeft ); //--------------------------------------------------------------------// AColPan_VR_STAR.FreeAlignments(); AColPanTrans_STAR_MR.FreeAlignments(); ARowPanTrans_MR_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LocalSymmetricAccumulateRU ( Orientation orientation, T alpha, const DistMatrix<T,MC, MR >& A, const DistMatrix<T,STAR,MC >& B_STAR_MC, const DistMatrix<T,MR, STAR>& BTrans_MR_STAR, DistMatrix<T,MC, STAR>& ZTrans_MC_STAR, DistMatrix<T,MR, STAR>& ZTrans_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateRU"); if( A.Grid() != B_STAR_MC.Grid() || B_STAR_MC.Grid() != BTrans_MR_STAR.Grid() || BTrans_MR_STAR.Grid() != ZTrans_MC_STAR.Grid() || ZTrans_MC_STAR.Grid() != ZTrans_MR_STAR.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_STAR_MC.Width() || A.Height() != BTrans_MR_STAR.Height() || A.Height() != ZTrans_MC_STAR.Height() || A.Height() != ZTrans_MR_STAR.Height() || B_STAR_MC.Height() != BTrans_MR_STAR.Width() || BTrans_MR_STAR.Width() != ZTrans_MC_STAR.Width() || ZTrans_MC_STAR.Width() != ZTrans_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateRU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[* ,MC] ~ " << B_STAR_MC.Height() << " x " << B_STAR_MC.Width() << "\n" << " B^H/T[MR,* ] ~ " << BTrans_MR_STAR.Height() << " x " << BTrans_MR_STAR.Width() << "\n" << " Z^H/T[MC,* ] ~ " << ZTrans_MC_STAR.Height() << " x " << ZTrans_MC_STAR.Width() << "\n" << " Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x " << ZTrans_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_STAR_MC.RowAlignment() != A.ColAlignment() || BTrans_MR_STAR.ColAlignment() != A.RowAlignment() || ZTrans_MC_STAR.ColAlignment() != A.ColAlignment() || ZTrans_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,STAR,MC> BL_STAR_MC(g), BR_STAR_MC(g), B0_STAR_MC(g), B1_STAR_MC(g), B2_STAR_MC(g); DistMatrix<T,MR,STAR> BTTrans_MR_STAR(g), B0Trans_MR_STAR(g), BBTrans_MR_STAR(g), B1Trans_MR_STAR(g), B2Trans_MR_STAR(g); DistMatrix<T,MC,STAR> ZTTrans_MC_STAR(g), Z0Trans_MC_STAR(g), ZBTrans_MC_STAR(g), Z1Trans_MC_STAR(g), Z2Trans_MC_STAR(g); DistMatrix<T,MR,STAR> ZBTrans_MR_STAR(g), Z0Trans_MR_STAR(g), ZTTrans_MR_STAR(g), Z1Trans_MR_STAR(g), Z2Trans_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B_STAR_MC, BL_STAR_MC, BR_STAR_MC, 0 ); LockedPartitionDown ( BTrans_MR_STAR, BTTrans_MR_STAR, BBTrans_MR_STAR, 0 ); PartitionDown ( ZTrans_MC_STAR, ZTTrans_MC_STAR, ZBTrans_MC_STAR, 0 ); PartitionDown ( ZTrans_MR_STAR, ZTTrans_MR_STAR, ZBTrans_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL_STAR_MC, /**/ BR_STAR_MC, B0_STAR_MC, /**/ B1_STAR_MC, B2_STAR_MC ); LockedRepartitionDown ( BTTrans_MR_STAR, B0Trans_MR_STAR, /***************/ /***************/ B1Trans_MR_STAR, BBTrans_MR_STAR, B2Trans_MR_STAR ); RepartitionDown ( ZTTrans_MC_STAR, Z0Trans_MC_STAR, /***************/ /***************/ Z1Trans_MC_STAR, ZBTrans_MC_STAR, Z2Trans_MC_STAR ); RepartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, /***************/ /***************/ Z1Trans_MR_STAR, ZBTrans_MR_STAR, Z2Trans_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTriangular( UPPER, D11 ); LocalGemm ( orientation, orientation, alpha, D11, B1_STAR_MC, T(1), Z1Trans_MR_STAR ); SetDiagonal( D11, T(0) ); LocalGemm ( NORMAL, NORMAL, alpha, D11, B1Trans_MR_STAR, T(1), Z1Trans_MC_STAR ); LocalGemm ( orientation, orientation, alpha, A12, B1_STAR_MC, T(1), Z2Trans_MR_STAR ); LocalGemm ( NORMAL, NORMAL, alpha, A12, B2Trans_MR_STAR, T(1), Z1Trans_MC_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL_STAR_MC, /**/ BR_STAR_MC, B0_STAR_MC, B1_STAR_MC, /**/ B2_STAR_MC ); SlideLockedPartitionDown ( BTTrans_MR_STAR, B0Trans_MR_STAR, B1Trans_MR_STAR, /***************/ /***************/ BBTrans_MR_STAR, B2Trans_MR_STAR ); SlidePartitionDown ( ZTTrans_MC_STAR, Z0Trans_MC_STAR, Z1Trans_MC_STAR, /***************/ /***************/ ZBTrans_MC_STAR, Z2Trans_MC_STAR ); SlidePartitionDown ( ZTTrans_MR_STAR, Z0Trans_MR_STAR, Z1Trans_MR_STAR, /***************/ /***************/ ZBTrans_MR_STAR, Z2Trans_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void SymmRUA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SymmRUA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,VC, STAR> B1Trans_VC_STAR(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MC, STAR> Z1Trans_MC_STAR(g); DistMatrix<T,MR, STAR> Z1Trans_MR_STAR(g); DistMatrix<T,MC, MR > Z1Trans(g); DistMatrix<T,MR, MC > Z1Trans_MR_MC(g); B1Trans_MR_STAR.AlignWith( A ); B1Trans_VC_STAR.AlignWith( A ); B1_STAR_MC.AlignWith( A ); Z1Trans_MC_STAR.AlignWith( A ); Z1Trans_MR_STAR.AlignWith( A ); Matrix<T> Z1Local; Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CT.Height() < C.Height() ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Z1Trans_MR_MC.AlignWith( C1 ); Zeros( C1.Width(), C1.Height(), Z1Trans_MC_STAR ); Zeros( C1.Width(), C1.Height(), Z1Trans_MR_STAR ); //--------------------------------------------------------------------// B1Trans_MR_STAR.TransposeFrom( B1, conjugate ); B1Trans_VC_STAR = B1Trans_MR_STAR; B1_STAR_MC.TransposeFrom( B1Trans_VC_STAR, conjugate ); LocalSymmetricAccumulateRU ( orientation, alpha, A, B1_STAR_MC, B1Trans_MR_STAR, Z1Trans_MC_STAR, Z1Trans_MR_STAR ); Z1Trans.SumScatterFrom( Z1Trans_MC_STAR ); Z1Trans_MR_MC = Z1Trans; Z1Trans_MR_MC.SumScatterUpdate( T(1), Z1Trans_MR_STAR ); Transpose( Z1Trans_MR_MC.LockedMatrix(), Z1Local, conjugate ); Axpy( T(1), Z1Local, C1.Matrix() ); //--------------------------------------------------------------------// Z1Trans_MR_MC.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void RUVB ( Conjugation conjugation, int offset, const DistMatrix<Complex<R> >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R> >& A ) { #ifndef RELEASE CallStackEntry entry("apply_packed_reflectors::RUVB"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag"); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); DistMatrix<C> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C> ALeft(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MR, STAR> HPan_MR_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MC > ZAdj_STAR_MC(g); DistMatrix<C,STAR,VC > ZAdj_STAR_VC(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionUp ( t, tT, tB, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; LockedView( HPan, H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); LockedRepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2, HPanWidth ); View( ALeft, A, 0, 0, A.Height(), HPanHeight ); HPan_MR_STAR.AlignWith( ALeft ); ZAdj_STAR_MC.AlignWith( ALeft ); ZAdj_STAR_VC.AlignWith( ALeft ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, C(1) ); HPan_VC_STAR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() ); Herk ( LOWER, ADJOINT, C(1), HPan_VC_STAR.LockedMatrix(), C(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm( ADJOINT, ADJOINT, C(1), HPan_MR_STAR, ALeft, ZAdj_STAR_MC ); ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC ); LocalTrsm ( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), SInv_STAR_STAR, ZAdj_STAR_VC ); ZAdj_STAR_MC = ZAdj_STAR_VC; LocalGemm ( ADJOINT, ADJOINT, C(-1), ZAdj_STAR_MC, HPan_MR_STAR, C(1), ALeft ); //--------------------------------------------------------------------// HPan_MR_STAR.FreeAlignments(); ZAdj_STAR_MC.FreeAlignments(); ZAdj_STAR_VC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); SlideLockedPartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); } }
inline void RUVB ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE CallStackEntry entry("apply_packed_reflectors::RUVB"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Height() != A.Width() ) throw std::logic_error ("Height of transforms must equal width of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> ALeft(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MR, STAR> HPan_MR_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MC > ZTrans_STAR_MC(g); DistMatrix<R,STAR,VC > ZTrans_STAR_VC(g); LockedPartitionUpDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); while( HBR.Height() < H.Height() && HBR.Width() < H.Width() ) { LockedRepartitionUpDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; LockedView( HPan, H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); View( ALeft, A, 0, 0, A.Height(), HPanHeight ); HPan_MR_STAR.AlignWith( ALeft ); ZTrans_STAR_MC.AlignWith( ALeft ); ZTrans_STAR_VC.AlignWith( ALeft ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonal( RIGHT, offset, HPanCopy, R(1) ); HPan_VC_STAR = HPanCopy; Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() ); Syrk ( LOWER, TRANSPOSE, R(1), HPan_VC_STAR.LockedMatrix(), R(0), SInv_STAR_STAR.Matrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MR_STAR = HPan_VC_STAR; LocalGemm ( TRANSPOSE, TRANSPOSE, R(1), HPan_MR_STAR, ALeft, ZTrans_STAR_MC ); ZTrans_STAR_VC.SumScatterFrom( ZTrans_STAR_MC ); LocalTrsm ( LEFT, LOWER, TRANSPOSE, NON_UNIT, R(1), SInv_STAR_STAR, ZTrans_STAR_VC ); ZTrans_STAR_MC = ZTrans_STAR_VC; LocalGemm ( TRANSPOSE, TRANSPOSE, R(-1), ZTrans_STAR_MC, HPan_MR_STAR, R(1), ALeft ); //--------------------------------------------------------------------// HPan_MR_STAR.FreeAlignments(); ZTrans_STAR_MC.FreeAlignments(); ZTrans_STAR_VC.FreeAlignments(); SlideLockedPartitionUpDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); } }