void RiffleDecay( ElementalMatrix<F>& A, Int n ) { DEBUG_CSE Riffle( A, n ); unique_ptr<ElementalMatrix<F>> PInf( A.Construct(A.Grid(),A.Root()) ); PInf->AlignWith( A.DistData() ); RiffleStationary( *PInf, n ); A -= *PInf; }
void ReshapeIntoGrid ( Int realSize, Int imagSize, const ElementalMatrix<T>& x, ElementalMatrix<T>& X ) { X.SetGrid( x.Grid() ); X.Resize( imagSize, realSize ); auto xSub = unique_ptr<ElementalMatrix<T>> ( x.Construct(x.Grid(),x.Root()) ); auto XSub = unique_ptr<ElementalMatrix<T>> ( X.Construct(X.Grid(),X.Root()) ); for( Int j=0; j<realSize; ++j ) { View( *XSub, X, IR(0,imagSize), IR(j) ); LockedView( *xSub, x, IR(j*imagSize,(j+1)*imagSize), ALL ); Copy( *xSub, *XSub ); } }
void GEPPGrowth( ElementalMatrix<T>& A, Int n ) { DEBUG_ONLY(CSE cse("GEPPGrowth")) Identity( A, n, n ); if( n <= 1 ) return; // Set the last column to all ones unique_ptr<ElementalMatrix<T>> aLast( A.Construct(A.Grid(),A.Root()) ); View( *aLast, A, IR(0,n), IR(n-1,n) ); Fill( *aLast, T(1) ); // Set the subdiagonals to -1 for( Int j=1; j<n; ++j ) FillDiagonal( A, T(-1), -j ); }
void MakeExtendedKahan ( ElementalMatrix<F>& A, Base<F> phi, Base<F> mu ) { EL_DEBUG_CSE typedef Base<F> Real; if( A.Height() != A.Width() ) LogicError("Extended Kahan matrices must be square"); const Int n = A.Height(); if( n % 3 != 0 ) LogicError("Dimension must be an integer multiple of 3"); const Int l = n / 3; if( !l || (l & (l-1)) ) LogicError("n/3 is not a power of two"); Int k=0; while( Int(1u<<k) < l ) ++k; if( phi <= Real(0) || phi >= Real(1) ) LogicError("phi must be in (0,1)"); if( mu <= Real(0) || mu >= Real(1) ) LogicError("mu must be in (0,1)"); // Start by setting A to the identity, and then modify the necessary // l x l blocks of its 3 x 3 partitioning. MakeIdentity( A ); unique_ptr<ElementalMatrix<F>> ABlock( A.Construct(A.Grid(),A.Root()) ); View( *ABlock, A, IR(2*l,3*l), IR(2*l,3*l) ); *ABlock *= mu; View( *ABlock, A, IR(0,l), IR(l,2*l) ); Walsh( *ABlock, k ); *ABlock *= -phi; View( *ABlock, A, IR(l,2*l), IR(2*l,3*l) ); Walsh( *ABlock, k ); *ABlock *= phi; // Now scale A by S const Real zeta = Sqrt(Real(1)-phi*phi); auto& ALoc = A.Matrix(); for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc ) { const Int i = A.GlobalRow(iLoc); const Real gamma = Pow(zeta,Real(i)); for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc ) ALoc(iLoc,jLoc) *= gamma; } }
void DruinskyToledo( ElementalMatrix<F>& A, Int k ) { EL_DEBUG_CSE const Int n = 2*k; Zeros( A, n, n ); if( k == 0 ) return; if( k == 1 ) { Ones( A, n, n ); return; } typedef Base<F> Real; const Real phi = Real(1) + 4*limits::Epsilon<Real>(); const Real alphaPhi = LDLPivotConstant<Real>(BUNCH_KAUFMAN_A)*phi; vector<Real> d( k-2 ); Real sigma(1); for( Int i=0; i<k-2; ++i ) { d[i] = -alphaPhi/sigma; sigma -= 1/d[i]; } unique_ptr<ElementalMatrix<F>> ASub( A.Construct(A.Grid(),A.Root()) ); View( *ASub, A, IR(k-2,k), IR(0,k) ); Ones( *ASub, 2, k ); View( *ASub, A, IR(0,k), IR(k-2,k) ); Ones( *ASub, k, 2 ); View( *ASub, A, IR(0,k-2), IR(0,k-2) ); Diagonal( *ASub, d ); View( *ASub, A, IR(k,n), IR(0,k) ); Identity( *ASub, k, k ); View( *ASub, A, IR(k,n), IR(k,n) ); Identity( *ASub, k, k ); View( *ASub, A, IR(0,k), IR(k,n) ); Identity( *ASub, k, k ); }
void LocalTrr2kKernel ( UpperOrLower uplo, Orientation orientA, Orientation orientB, Orientation orientC, Orientation orientD, T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B, T beta, const ElementalMatrix<T>& C, const ElementalMatrix<T>& D, ElementalMatrix<T>& E ) { DEBUG_CSE const bool transA = orientA != NORMAL; const bool transB = orientB != NORMAL; const bool transC = orientC != NORMAL; const bool transD = orientD != NORMAL; // TODO: Stringent distribution and alignment checks typedef ElementalMatrix<T> ADM; auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto FTL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto FBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); const Int half = E.Height() / 2; if( transA ) LockedPartitionRight( A, *A0, *A1, half ); else LockedPartitionDown( A, *A0, *A1, half ); if( transB ) LockedPartitionDown( B, *B0, *B1, half ); else LockedPartitionRight( B, *B0, *B1, half ); if( transC ) LockedPartitionRight( C, *C0, *C1, half ); else LockedPartitionDown( C, *C0, *C1, half ); if( transD ) LockedPartitionDown( D, *D0, *D1, half ); else LockedPartitionRight( D, *D0, *D1, half ); PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half ); if( uplo == LOWER ) { Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B0->LockedMatrix(), T(1), EBL->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D0->LockedMatrix(), T(1), EBL->Matrix() ); } else { Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B1->LockedMatrix(), T(1), ETR->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D1->LockedMatrix(), T(1), ETR->Matrix() ); } FTL->AlignWith( *ETL ); FTL->Resize( ETL->Height(), ETL->Width() ); Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B0->LockedMatrix(), T(0), FTL->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D0->LockedMatrix(), T(1), FTL->Matrix() ); AxpyTrapezoid( uplo, T(1), *FTL, *ETL ); FBR->AlignWith( *EBR ); FBR->Resize( EBR->Height(), EBR->Width() ); Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B1->LockedMatrix(), T(0), FBR->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D1->LockedMatrix(), T(1), FBR->Matrix() ); AxpyTrapezoid( uplo, T(1), *FBR, *EBR ); }
void LocalTrr2k ( UpperOrLower uplo, Orientation orientA, Orientation orientB, Orientation orientC, Orientation orientD, T alpha, const ElementalMatrix<T>& A, const ElementalMatrix<T>& B, T beta, const ElementalMatrix<T>& C, const ElementalMatrix<T>& D, T gamma, ElementalMatrix<T>& E ) { using namespace trr2k; DEBUG_CSE const bool transA = orientA != NORMAL; const bool transB = orientB != NORMAL; const bool transC = orientC != NORMAL; const bool transD = orientD != NORMAL; // TODO: Stringent distribution and alignment checks ScaleTrapezoid( gamma, uplo, E ); if( E.Height() < E.Grid().Width()*LocalTrr2kBlocksize<T>() ) { LocalTrr2kKernel ( uplo, orientA, orientB, orientC, orientD, alpha, A, B, beta, C, D, E ); } else { typedef ElementalMatrix<T> ADM; // Ugh. This is likely too much overhead. It should be measured. auto A0 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto A1 = unique_ptr<ADM>( A.Construct(A.Grid(),A.Root()) ); auto B0 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto B1 = unique_ptr<ADM>( B.Construct(B.Grid(),B.Root()) ); auto C0 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto C1 = unique_ptr<ADM>( C.Construct(C.Grid(),C.Root()) ); auto D0 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto D1 = unique_ptr<ADM>( D.Construct(D.Grid(),D.Root()) ); auto ETL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto ETR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBL = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); auto EBR = unique_ptr<ADM>( E.Construct(E.Grid(),E.Root()) ); const Int half = E.Height() / 2; if( transA ) LockedPartitionRight( A, *A0, *A1, half ); else LockedPartitionDown( A, *A0, *A1, half ); if( transB ) LockedPartitionDown( B, *B0, *B1, half ); else LockedPartitionRight( B, *B0, *B1, half ); if( transC ) LockedPartitionRight( C, *C0, *C1, half ); else LockedPartitionDown( C, *C0, *C1, half ); if( transD ) LockedPartitionDown( D, *D0, *D1, half ); else LockedPartitionRight( D, *D0, *D1, half ); PartitionDownDiagonal( E, *ETL, *ETR, *EBL, *EBR, half ); if( uplo == LOWER ) { Gemm ( orientA, orientB, alpha, A1->LockedMatrix(), B0->LockedMatrix(), T(1), EBL->Matrix() ); Gemm ( orientC, orientD, beta, C1->LockedMatrix(), D0->LockedMatrix(), T(1), EBL->Matrix() ); } else { Gemm ( orientA, orientB, alpha, A0->LockedMatrix(), B1->LockedMatrix(), T(1), ETR->Matrix() ); Gemm ( orientC, orientD, beta, C0->LockedMatrix(), D1->LockedMatrix(), T(1), ETR->Matrix() ); } // Recurse LocalTrr2k ( uplo, orientA, orientB, orientC, orientD, alpha, *A0, *B0, beta, *C0, *D0, T(1), *ETL ); LocalTrr2k ( uplo, orientA, orientB, orientC, orientD, alpha, *A1, *B1, beta, *C1, *D1, T(1), *EBR ); } }