void TestCorrectness ( bool print, const ElementalMatrix<Complex<Real>>& A, const ElementalMatrix<Complex<Real>>& w, const ElementalMatrix<Complex<Real>>& V ) { const Int n = A.Height(); const Real eps = limits::Epsilon<Real>(); const Real oneNormA = OneNorm( A ); // Find the residual R = AV-VW DistMatrix<Complex<Real>> R( V.Height(), V.Width(), A.Grid() ); Gemm ( NORMAL, NORMAL, Complex<Real>(1), A, V, Complex<Real>(0), R); DistMatrix<Complex<Real>> VW( V ); DiagonalScale( RIGHT, NORMAL, w, VW ); R -= VW; const Real infError = InfinityNorm( R ); const Real relError = infError / (eps*n*oneNormA); OutputFromRoot (A.Grid().Comm(),"|| A V - V W ||_oo / (eps n || A ||_1) = ",relError); // TODO: A more refined failure condition if( relError > Real(100) ) LogicError("Relative error was unacceptably large"); }
void ExplicitTriang( ElementalMatrix<F>& A ) { DEBUG_ONLY(CSE cse("rq::ExplicitTriang")) DistMatrix<F,MD,STAR> t(A.Grid()); DistMatrix<Base<F>,MD,STAR> d(A.Grid()); Householder( A, t, d ); MakeTrapezoidal( UPPER, A, A.Width()-A.Height() ); }
void TransposeAxpyContract ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B, bool conjugate ) { EL_DEBUG_CSE const Dist U = B.ColDist(); const Dist V = B.RowDist(); if( A.ColDist() == V && A.RowDist() == U ) { TransposeAxpy( alpha, A, B, conjugate ); } else if( (A.ColDist() == V && A.RowDist() == Partial(U)) || (A.ColDist() == V && A.RowDist() == Collect(U)) || (A.RowDist() == U && A.ColDist() == Partial(V)) || (A.RowDist() == U && A.ColDist() == Collect(V)) ) { unique_ptr<ElementalMatrix<T>> ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) ); if( B.ColConstrained() ) ASumFilt->AlignRowsWith( B, true ); if( B.RowConstrained() ) ASumFilt->AlignColsWith( B, true ); Contract( A, *ASumFilt ); if( !B.ColConstrained() ) B.AlignColsWith( *ASumFilt, false ); if( !B.RowConstrained() ) B.AlignRowsWith( *ASumFilt, false ); // We should have ensured that the alignments are compatible TransposeAxpy( alpha, ASumFilt->LockedMatrix(), B.Matrix(), conjugate ); } else LogicError("Incompatible distributions"); }
void Explicit( ElementalMatrix<F>& L, ElementalMatrix<F>& APre ) { DEBUG_CSE const Grid& g = APre.Grid(); DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); auto& A = AProx.Get(); DistMatrix<F,MD,STAR> householderScalars(g); DistMatrix<Base<F>,MD,STAR> signature(g); LQ( A, householderScalars, signature ); const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); auto AL = A( IR(0,m), IR(0,minDim) ); Copy( AL, L ); MakeTrapezoidal( LOWER, L ); // TODO: Replace this with an in-place expansion of Q DistMatrix<F> Q(g); Identity( Q, A.Height(), A.Width() ); lq::ApplyQ( RIGHT, NORMAL, A, householderScalars, signature, Q ); Copy( Q, APre ); }
void ExplicitTriang( ElementalMatrix<F>& A, const QRCtrl<Base<F>>& ctrl ) { DEBUG_ONLY(CSE cse("qr::ExplicitTriang")) DistMatrix<F,MD,STAR> t(A.Grid()); DistMatrix<Base<F>,MD,STAR> d(A.Grid()); if( ctrl.colPiv ) { DistPermutation Omega(A.Grid()); BusingerGolub( A, t, d, Omega, ctrl ); } else Householder( A, t, d ); A.Resize( t.Height(), A.Width() ); MakeTrapezoidal( UPPER, A ); }
void TransposeContract ( const ElementalMatrix<T>& A, ElementalMatrix<T>& B, bool conjugate ) { EL_DEBUG_CSE const Dist U = B.ColDist(); const Dist V = B.RowDist(); if( A.ColDist() == V && A.RowDist() == Partial(U) ) { Transpose( A, B, conjugate ); } else { unique_ptr<ElementalMatrix<T>> ASumFilt( B.ConstructTranspose(B.Grid(),B.Root()) ); if( B.ColConstrained() ) ASumFilt->AlignRowsWith( B, true ); if( B.RowConstrained() ) ASumFilt->AlignColsWith( B, true ); Contract( A, *ASumFilt ); if( !B.ColConstrained() ) B.AlignColsWith( *ASumFilt, false ); if( !B.RowConstrained() ) B.AlignRowsWith( *ASumFilt, false ); // We should have ensured that the alignments match B.Resize( A.Width(), A.Height() ); Transpose( ASumFilt->LockedMatrix(), B.Matrix(), conjugate ); } }
void ExplicitTriang( ElementalMatrix<F>& A, const QRCtrl<Base<F>>& ctrl ) { DEBUG_CSE DistMatrix<F,MD,STAR> householderScalars(A.Grid()); DistMatrix<Base<F>,MD,STAR> signature(A.Grid()); if( ctrl.colPiv ) { DistPermutation Omega(A.Grid()); BusingerGolub( A, householderScalars, signature, Omega, ctrl ); } else Householder( A, householderScalars, signature ); A.Resize( householderScalars.Height(), A.Width() ); MakeTrapezoidal( UPPER, A ); }
Base<F> TwoNorm( const ElementalMatrix<F>& A ) { DEBUG_ONLY(CSE cse("TwoNorm")) DistMatrix<Base<F>,VR,STAR> s( A.Grid() ); SVD( A, s ); return InfinityNorm( s ); }
Base<F> HermitianTwoNorm( UpperOrLower uplo, const ElementalMatrix<F>& A ) { DEBUG_ONLY(CSE cse("HermitianTwoNorm")) DistMatrix<Base<F>,VR,STAR> s( A.Grid() ); HermitianSVD( uplo, A, s ); return InfinityNorm( s ); }
void LUNMedium ( const ElementalMatrix<F>& UPre, ElementalMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = UPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> UProx( UPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& U = UProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); const Int kLast = LastOffset( m, bsize ); Int k=kLast, kOld=m; while( true ) { const bool in2x2 = ( k>0 && U.Get(k,k-1) != F(0) ); if( in2x2 ) --k; const Int nb = kOld-k; const Range<Int> ind0( 0, k ), ind1( k, k+nb ); auto U01 = U( ind0, ind1 ); auto U11 = U( ind1, ind1 ); auto X0 = X( ind0, ALL ); auto X1 = X( ind1, ALL ); U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1Trans_MR_STAR.AlignWith( X0 ); Transpose( X1, X1Trans_MR_STAR ); // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ] // = (U11^-1[* ,* ] X1[* ,MR])^T LocalQuasiTrsm ( RIGHT, UPPER, TRANSPOSE, F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); Transpose( X1Trans_MR_STAR, X1 ); U01_MC_STAR.AlignWith( X0 ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 ); if( k == 0 ) break; kOld = k; k -= Min(bsize,k); } }
void PartialColScatter ( T alpha, const ElementalMatrix<T>& A, ElementalMatrix<T>& B ) { DEBUG_ONLY(CSE cse("axpy_contract::PartialColScatter")) AssertSameGrids( A, B ); if( A.Height() != B.Height() || A.Width() != B.Width() ) LogicError("A and B must be the same size"); #ifdef EL_CACHE_WARNINGS if( A.Width() != 1 && A.Grid().Rank() == 0 ) { cerr << "axpy_contract::PartialColScatterUpdate potentially causes a large " "amount of cache-thrashing. If possible, avoid it by forming the " "(conjugate-)transpose of the [UGath,* ] matrix instead." << endl; } #endif if( B.ColAlign() % A.ColStride() == A.ColAlign() ) { const Int colStride = B.ColStride(); const Int colStridePart = B.PartialColStride(); const Int colStrideUnion = B.PartialUnionColStride(); const Int colRankPart = B.PartialColRank(); const Int colAlign = B.ColAlign(); const Int height = B.Height(); const Int width = B.Width(); const Int localHeight = B.LocalHeight(); const Int maxLocalHeight = MaxLength( height, colStride ); const Int recvSize = mpi::Pad( maxLocalHeight*width ); const Int sendSize = colStrideUnion*recvSize; //vector<T> buffer( sendSize ); vector<T> buffer; buffer.reserve( sendSize ); // Pack copy::util::PartialColStridedPack ( height, width, colAlign, colStride, colStrideUnion, colStridePart, colRankPart, A.ColShift(), A.LockedBuffer(), A.LDim(), buffer.data(), recvSize ); // Communicate mpi::ReduceScatter( buffer.data(), recvSize, B.PartialUnionColComm() ); // Unpack our received data axpy::util::InterleaveMatrixUpdate ( alpha, localHeight, width, buffer.data(), 1, localHeight, B.Buffer(), 1, B.LDim() ); } else LogicError("Unaligned PartialColScatter not implemented"); }
void Riffle( ElementalMatrix<F>& P, ElementalMatrix<F>& PInf, Int n ) { DEBUG_CSE Riffle( P, n ); PInf.SetGrid( P.Grid() ); PInf.AlignWith( P.DistData() ); RiffleStationary( PInf, n ); }
void IPM ( const ElementalMatrix<Real>& A, const ElementalMatrix<Real>& b, Real lambda, ElementalMatrix<Real>& x, const qp::affine::Ctrl<Real>& ctrl ) { DEBUG_CSE const Int m = A.Height(); const Int n = A.Width(); const Grid& g = A.Grid(); const Range<Int> uInd(0,n), vInd(n,2*n), rInd(2*n,2*n+m); DistMatrix<Real> Q(g), c(g), AHat(g), G(g), h(g); // Q := | 0 0 0 | // | 0 0 0 | // | 0 0 I | // ============== Zeros( Q, 2*n+m, 2*n+m ); auto Qrr = Q( rInd, rInd ); FillDiagonal( Qrr, Real(1) ); // c := lambda*[1;1;0] // =================== Zeros( c, 2*n+m, 1 ); auto cuv = c( IR(0,2*n), ALL ); Fill( cuv, lambda ); // \hat A := [A, -A, I] // ==================== Zeros( AHat, m, 2*n+m ); auto AHatu = AHat( IR(0,m), uInd ); auto AHatv = AHat( IR(0,m), vInd ); auto AHatr = AHat( IR(0,m), rInd ); AHatu = A; AHatv -= A; FillDiagonal( AHatr, Real(1) ); // G := | -I 0 0 | // | 0 -I 0 | // ================ Zeros( G, 2*n, 2*n+m ); FillDiagonal( G, Real(-1) ); // h := 0 // ====== Zeros( h, 2*n, 1 ); // Solve the affine QP // =================== DistMatrix<Real> xHat(g), y(g), z(g), s(g); QP( Q, AHat, G, b, c, h, xHat, y, z, s, ctrl ); // x := u - v // ========== x = xHat( uInd, ALL ); x -= xHat( vInd, ALL ); }
void RiffleDecay( ElementalMatrix<F>& A, Int n ) { DEBUG_CSE Riffle( A, n ); unique_ptr<ElementalMatrix<F>> PInf( A.Construct(A.Grid(),A.Root()) ); PInf->AlignWith( A.DistData() ); RiffleStationary( *PInf, n ); A -= *PInf; }
void UniformHelmholtzGreens ( ElementalMatrix<Complex<Real>>& A, Int n, Real lambda ) { EL_DEBUG_CSE typedef Complex<Real> C; const Real pi = 4*Atan( Real(1) ); const Real k0 = 2*pi/lambda; const Grid& g = A.Grid(); // Generate a list of n uniform samples from the 3D unit ball DistMatrix<Real,STAR,VR> X_STAR_VR(3,n,g); for( Int jLoc=0; jLoc<X_STAR_VR.LocalWidth(); ++jLoc ) { Real x0, x1, x2; // Sample uniformly from [-1,+1]^3 until a point is drawn from the ball while( true ) { x0 = SampleUniform( Real(-1), Real(1) ); x1 = SampleUniform( Real(-1), Real(1) ); x2 = SampleUniform( Real(-1), Real(1) ); const Real radiusSq = x0*x0 + x1*x1 + x2*x2; if( radiusSq > 0 && radiusSq <= 1 ) break; } X_STAR_VR.SetLocal( 0, jLoc, x0 ); X_STAR_VR.SetLocal( 1, jLoc, x1 ); X_STAR_VR.SetLocal( 2, jLoc, x2 ); } DistMatrix<Real,STAR,STAR> X_STAR_STAR( X_STAR_VR ); A.Resize( n, n ); for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc ) { const Int j = A.GlobalCol(jLoc); const Real xj0 = X_STAR_STAR.GetLocal(0,j); const Real xj1 = X_STAR_STAR.GetLocal(1,j); const Real xj2 = X_STAR_STAR.GetLocal(2,j); for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc ) { const Int i = A.GlobalRow(iLoc); if( i == j ) { A.SetLocal( iLoc, jLoc, 0 ); } else { const Real d0 = X_STAR_STAR.GetLocal(0,i)-xj0; const Real d1 = X_STAR_STAR.GetLocal(1,i)-xj1; const Real d2 = X_STAR_STAR.GetLocal(2,i)-xj2; const Real gamma = k0*Sqrt(d0*d0+d1*d1+d2*d2); const Real realPart = Cos(gamma)/gamma; const Real imagPart = Sin(gamma)/gamma; A.SetLocal( iLoc, jLoc, C(realPart,imagPart) ); } } } }
void SDC ( UpperOrLower uplo, ElementalMatrix<F>& APre, ElementalMatrix<Base<F>>& wPre, const HermitianSDCCtrl<Base<F>> ctrl ) { DEBUG_CSE typedef Base<F> Real; const Int n = APre.Height(); wPre.Resize( n, 1 ); if( APre.Grid().Size() == 1 ) { HermitianEig( uplo, APre.Matrix(), wPre.Matrix() ); return; } if( n <= ctrl.cutoff ) { HermitianEig( uplo, APre, wPre ); return; } DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixWriteProxy<Real,Real,VR,STAR> wProx( wPre ); auto& A = AProx.Get(); auto& w = wProx.Get(); // Perform this level's split const auto part = SpectralDivide( uplo, A, ctrl ); auto ind1 = IR(0,part.index); auto ind2 = IR(part.index,n); auto ATL = A( ind1, ind1 ); auto ATR = A( ind1, ind2 ); auto ABL = A( ind2, ind1 ); auto ABR = A( ind2, ind2 ); auto wT = w( ind1, ALL ); auto wB = w( ind2, ALL ); if( uplo == LOWER ) Zero( ABL ); else Zero( ATR ); // Recurse on the two subproblems DistMatrix<F> ATLSub, ABRSub; DistMatrix<Real,VR,STAR> wTSub, wBSub; PushSubproblems ( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub, ctrl.progress ); if( ATL.Participating() ) SDC( uplo, ATLSub, wTSub, ctrl ); if( ABR.Participating() ) SDC( uplo, ABRSub, wBSub, ctrl ); PullSubproblems( ATL, ABR, ATLSub, ABRSub, wT, wB, wTSub, wBSub ); }
void ReshapeIntoGrid ( Int realSize, Int imagSize, const ElementalMatrix<T>& x, ElementalMatrix<T>& X ) { X.SetGrid( x.Grid() ); X.Resize( imagSize, realSize ); auto xSub = unique_ptr<ElementalMatrix<T>> ( x.Construct(x.Grid(),x.Root()) ); auto XSub = unique_ptr<ElementalMatrix<T>> ( X.Construct(X.Grid(),X.Root()) ); for( Int j=0; j<realSize; ++j ) { View( *XSub, X, IR(0,imagSize), IR(j) ); LockedView( *xSub, x, IR(j*imagSize,(j+1)*imagSize), ALL ); Copy( *xSub, *XSub ); } }
Base<F> SymmetricTwoNorm( UpperOrLower uplo, const ElementalMatrix<F>& A ) { DEBUG_ONLY(CSE cse("SymmetricTwoNorm")) DistMatrix<F> B( A ); DistMatrix<Base<F>,VR,STAR> s( A.Grid() ); MakeSymmetric( uplo, B ); SVDCtrl<Base<F>> ctrl; ctrl.overwrite = true; SVD( B, s, ctrl ); return MaxNorm( s ); }
void LLNMedium ( const ElementalMatrix<F>& LPre, ElementalMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); for( Int k=0; k<m; k+=bsize ) { const Int nbProp = Min(bsize,m-k); const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) ); const Int nb = ( in2x2 ? nbProp+1 : nbProp ); const Range<Int> ind1( k, k+nb ), ind2( k+nb, m ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, ALL ); auto X2 = X( ind2, ALL ); L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR] X1Trans_MR_STAR.AlignWith( X2 ); Transpose( X1, X1Trans_MR_STAR ); // X1^T[MR,* ] := X1^T[MR,* ] L11^-T[* ,* ] // = (L11^-1[* ,* ] X1[* ,MR])^T LocalQuasiTrsm ( RIGHT, LOWER, TRANSPOSE, F(1), L11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); Transpose( X1Trans_MR_STAR, X1 ); L21_MC_STAR.AlignWith( X2 ); L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR] // X2[MC,MR] -= L21[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), L21_MC_STAR, X1Trans_MR_STAR, F(1), X2 ); } }
void LLNLarge ( const ElementalMatrix<F>& LPre, ElementalMatrix<F>& XPre, bool checkIfSingular ) { DEBUG_CSE const Int m = XPre.Height(); const Int bsize = Blocksize(); const Grid& g = LPre.Grid(); DistMatrixReadProxy<F,F,MC,MR> LProx( LPre ); DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre ); auto& L = LProx.GetLocked(); auto& X = XProx.Get(); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,MC, STAR> L21_MC_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); for( Int k=0; k<m; k+=bsize ) { const Int nbProp = Min(bsize,m-k); const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) ); const Int nb = ( in2x2 ? nbProp+1 : nbProp ); const Range<Int> ind1( k, k+nb ), ind2( k+nb, m ); auto L11 = L( ind1, ind1 ); auto L21 = L( ind2, ind1 ); auto X1 = X( ind1, ALL ); auto X2 = X( ind2, ALL ); // X1[* ,VR] := L11^-1[* ,* ] X1[* ,VR] L11_STAR_STAR = L11; X1_STAR_VR = X1; LocalQuasiTrsm ( LEFT, LOWER, NORMAL, F(1), L11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR.AlignWith( X2 ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] L21_MC_STAR.AlignWith( X2 ); L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR] // X2[MC,MR] -= L21[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), L21_MC_STAR, X1_STAR_MR, F(1), X2 ); } }
void Apply ( const ElementalMatrix<Real>& x, ElementalMatrix<Real>& y, const ElementalMatrix<Int>& orders, const ElementalMatrix<Int>& firstInds, Int cutoff ) { DEBUG_ONLY(CSE cse("soc::Apply")) // TODO?: Optimize DistMatrix<Real,VC,STAR> z(x.Grid()); soc::Apply( x, y, z, orders, firstInds, cutoff ); y = z; }
void ExplicitTriang( ElementalMatrix<F>& A ) { DEBUG_CSE const Grid& g = A.Grid(); DistMatrix<F,MD,STAR> householderScalars(g); DistMatrix<Base<F>,MD,STAR> signature(g); LQ( A, householderScalars, signature ); const Int m = A.Height(); const Int n = A.Width(); const Int minDim = Min(m,n); A.Resize( m, minDim ); MakeTrapezoidal( LOWER, A ); }
void SkewHermitianEig ( UpperOrLower uplo, const ElementalMatrix<F>& G, ElementalMatrix<Base<F>>& wImag, SortType sort, const HermitianEigSubset<Base<F>>& subset, const HermitianEigCtrl<Complex<Base<F>>>& ctrl ) { DEBUG_ONLY(CSE cse("SkewHermitianEig")) DistMatrix<Complex<Base<F>>> A(G.Grid()); Copy( G, A ); ScaleTrapezoid( Complex<Base<F>>(0,-1), uplo, A ); HermitianEig( uplo, A, wImag, sort, subset, ctrl ); }
void GEPPGrowth( ElementalMatrix<T>& A, Int n ) { DEBUG_ONLY(CSE cse("GEPPGrowth")) Identity( A, n, n ); if( n <= 1 ) return; // Set the last column to all ones unique_ptr<ElementalMatrix<T>> aLast( A.Construct(A.Grid(),A.Root()) ); View( *aLast, A, IR(0,n), IR(n-1,n) ); Fill( *aLast, T(1) ); // Set the subdiagonals to -1 for( Int j=1; j<n; ++j ) FillDiagonal( A, T(-1), -j ); }
void MakeExtendedKahan ( ElementalMatrix<F>& A, Base<F> phi, Base<F> mu ) { EL_DEBUG_CSE typedef Base<F> Real; if( A.Height() != A.Width() ) LogicError("Extended Kahan matrices must be square"); const Int n = A.Height(); if( n % 3 != 0 ) LogicError("Dimension must be an integer multiple of 3"); const Int l = n / 3; if( !l || (l & (l-1)) ) LogicError("n/3 is not a power of two"); Int k=0; while( Int(1u<<k) < l ) ++k; if( phi <= Real(0) || phi >= Real(1) ) LogicError("phi must be in (0,1)"); if( mu <= Real(0) || mu >= Real(1) ) LogicError("mu must be in (0,1)"); // Start by setting A to the identity, and then modify the necessary // l x l blocks of its 3 x 3 partitioning. MakeIdentity( A ); unique_ptr<ElementalMatrix<F>> ABlock( A.Construct(A.Grid(),A.Root()) ); View( *ABlock, A, IR(2*l,3*l), IR(2*l,3*l) ); *ABlock *= mu; View( *ABlock, A, IR(0,l), IR(l,2*l) ); Walsh( *ABlock, k ); *ABlock *= -phi; View( *ABlock, A, IR(l,2*l), IR(2*l,3*l) ); Walsh( *ABlock, k ); *ABlock *= phi; // Now scale A by S const Real zeta = Sqrt(Real(1)-phi*phi); auto& ALoc = A.Matrix(); for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc ) { const Int i = A.GlobalRow(iLoc); const Real gamma = Pow(zeta,Real(i)); for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc ) ALoc(iLoc,jLoc) *= gamma; } }
DM::DistMatrix( const ElementalMatrix<T>& A ) : EM(A.Grid()) { DEBUG_ONLY(CSE cse("DM(EM)")) if( COLDIST == CIRC && ROWDIST == CIRC ) this->matrix_.viewType_ = OWNER; this->SetShifts(); #define GUARD(CDIST,RDIST) \ A.DistData().colDist == CDIST && A.DistData().rowDist == RDIST #define PAYLOAD(CDIST,RDIST) \ auto& ACast = dynamic_cast<const DistMatrix<T,CDIST,RDIST>&>(A); \ if( COLDIST != CDIST || ROWDIST != RDIST || \ reinterpret_cast<const DM*>(&A) != this ) \ *this = ACast; \ else \ LogicError("Tried to construct DistMatrix with itself"); #include "El/macros/GuardAndPayload.h" }
void DruinskyToledo( ElementalMatrix<F>& A, Int k ) { EL_DEBUG_CSE const Int n = 2*k; Zeros( A, n, n ); if( k == 0 ) return; if( k == 1 ) { Ones( A, n, n ); return; } typedef Base<F> Real; const Real phi = Real(1) + 4*limits::Epsilon<Real>(); const Real alphaPhi = LDLPivotConstant<Real>(BUNCH_KAUFMAN_A)*phi; vector<Real> d( k-2 ); Real sigma(1); for( Int i=0; i<k-2; ++i ) { d[i] = -alphaPhi/sigma; sigma -= 1/d[i]; } unique_ptr<ElementalMatrix<F>> ASub( A.Construct(A.Grid(),A.Root()) ); View( *ASub, A, IR(k-2,k), IR(0,k) ); Ones( *ASub, 2, k ); View( *ASub, A, IR(0,k), IR(k-2,k) ); Ones( *ASub, k, 2 ); View( *ASub, A, IR(0,k-2), IR(0,k-2) ); Diagonal( *ASub, d ); View( *ASub, A, IR(k,n), IR(0,k) ); Identity( *ASub, k, k ); View( *ASub, A, IR(k,n), IR(k,n) ); Identity( *ASub, k, k ); View( *ASub, A, IR(0,k), IR(k,n) ); Identity( *ASub, k, k ); }
void ExplicitUnitary( ElementalMatrix<F>& APre ) { DEBUG_CSE const Grid& g = APre.Grid(); DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); auto& A = AProx.Get(); DistMatrix<F,MD,STAR> householderScalars(g); DistMatrix<Base<F>,MD,STAR> signature(g); LQ( A, householderScalars, signature ); // TODO: Replace this with an in-place expansion of Q DistMatrix<F> Q(g); Q.AlignWith( A ); Identity( Q, A.Height(), A.Width() ); lq::ApplyQ( RIGHT, NORMAL, A, householderScalars, signature, Q ); Copy( Q, APre ); }
void TestCorrectness ( bool print, UpperOrLower uplo, const ElementalMatrix<F>& AOrig, const ElementalMatrix<F>& A, const ElementalMatrix<Base<F>>& w, const ElementalMatrix<F>& Z ) { typedef Base<F> Real; const Grid& g = A.Grid(); const Int n = Z.Height(); const Int k = Z.Width(); const Real eps = limits::Epsilon<Real>(); DistMatrix<F> X(g); Identity( X, k, k ); Herk( uplo, ADJOINT, Real(-1), Z, Real(1), X ); const Real infOrthogError = HermitianInfinityNorm( uplo, X ); const Real relOrthogError = infOrthogError / (eps*n); OutputFromRoot(g.Comm(),"||Z^H Z - I||_oo / (eps n) = ",relOrthogError); // X := AZ X.AlignWith( Z ); Zeros( X, n, k ); Hemm( LEFT, uplo, F(1), AOrig, Z, F(0), X ); // Find the residual ||X-ZW||_oo = ||AZ-ZW||_oo DistMatrix<F> ZW( Z ); DiagonalScale( RIGHT, NORMAL, w, ZW ); X -= ZW; const Real oneNormA = HermitianOneNorm( uplo, AOrig ); if( oneNormA == Real(0) ) LogicError("Tried to test relative accuracy on zero matrix..."); const Real infError = InfinityNorm( X ); const Real relError = infError / (n*eps*oneNormA); OutputFromRoot(g.Comm(),"||A Z - Z W||_oo / (eps n ||A||_1) = ",relError); // TODO: More refined failure conditions if( relOrthogError > Real(200) ) // yes, really LogicError("Relative orthogonality error was unacceptably large"); if( relError > Real(10) ) LogicError("Relative error was unacceptably large"); }
void LocalTrr2kKernel ( UpperOrLower uplo, Orientation orientA, Orientation orientB, Orientation orientC, Orientation orientD, T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B, T beta, const ElementalMatrix<T>& C, const ElementalMatrix<T>& D, ElementalMatrix<T>& E ) { DEBUG_CSE const bool transA = orientA != NORMAL; const bool transB = orientB != NORMAL; const bool transC = orientC != NORMAL; const bool transD = orientD != NORMAL; // TODO: Stringent distribution and alignment checks typedef ElementalMatrix<T> ADM; auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto FTL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto FBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); const Int half = E.Height() / 2; if( transA ) LockedPartitionRight( A, *A0, *A1, half ); else LockedPartitionDown( A, *A0, *A1, half ); if( transB ) LockedPartitionDown( B, *B0, *B1, half ); else LockedPartitionRight( B, *B0, *B1, half ); if( transC ) LockedPartitionRight( C, *C0, *C1, half ); else LockedPartitionDown( C, *C0, *C1, half ); if( transD ) LockedPartitionDown( D, *D0, *D1, half ); else LockedPartitionRight( D, *D0, *D1, half ); PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half ); if( uplo == LOWER ) { Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B0->LockedMatrix(), T(1), EBL->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D0->LockedMatrix(), T(1), EBL->Matrix() ); } else { Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B1->LockedMatrix(), T(1), ETR->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D1->LockedMatrix(), T(1), ETR->Matrix() ); } FTL->AlignWith( *ETL ); FTL->Resize( ETL->Height(), ETL->Width() ); Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B0->LockedMatrix(), T(0), FTL->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D0->LockedMatrix(), T(1), FTL->Matrix() ); AxpyTrapezoid( uplo, T(1), *FTL, *ETL ); FBR->AlignWith( *EBR ); FBR->Resize( EBR->Height(), EBR->Width() ); Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B1->LockedMatrix(), T(0), FBR->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D1->LockedMatrix(), T(1), FBR->Matrix() ); AxpyTrapezoid( uplo, T(1), *FBR, *EBR ); }