std::unique_ptr<ElDistVector> ElRBFInterpolation::interpolate( const std::unique_ptr<ElDistVector> & values ) { assert( Phi->Height() > 0 ); assert( H->Height() > 0 ); assert( values->Height() == Phi->Width() ); std::unique_ptr<ElDistVector> result( new ElDistVector( Phi->Grid() ) ); result->AlignRowsWith( *Phi ); El::DistMatrix<double> B = *values; if ( HCopy.Width() == 0 ) { HCopy = El::DistMatrix<double> ( *H ); El::DistMatrixReadProxy<double, double, El::MC, El::MR> AProx( HCopy ); auto & A = AProx.Get(); p = El::DistPermutation( A.Grid() ); dSub = El::DistMatrix<double, El::MD, El::STAR> ( A.Grid() ); El::LDL( A, dSub, p, false, El::LDLPivotCtrl<El::Base<double> >() ); } El::DistMatrixReadProxy<double, double, El::MC, El::MR> AProx( HCopy ); El::DistMatrixReadWriteProxy<double, double, El::MC, El::MR> BProx( B ); auto & A = AProx.Get(); auto & B_LU = BProx.Get(); El::ldl::SolveAfter( A, dSub, p, B_LU, false ); El::Gemm( El::Orientation::NORMAL, El::Orientation::NORMAL, 1.0, *Phi, B, *result ); return result; }
void ApplyQ ( LeftOrRight side, Orientation orientation, const AbstractDistMatrix<F>& APre, const AbstractDistMatrix<F>& householderScalars, const AbstractDistMatrix<Base<F>>& signature, AbstractDistMatrix<F>& BPre ) { EL_DEBUG_CSE const bool normal = (orientation==NORMAL); const bool onLeft = (side==LEFT); const bool applyDFirst = normal==onLeft; const Int minDim = Min(APre.Height(),APre.Width()); const ForwardOrBackward direction = ( normal==onLeft ? BACKWARD : FORWARD ); const Conjugation conjugation = ( normal ? CONJUGATED : UNCONJUGATED ); DistMatrixReadProxy<F,F,MC,MR> AProx( APre ); DistMatrixReadWriteProxy<F,F,MC,MR> BProx( BPre ); auto& A = AProx.GetLocked(); auto& B = BProx.Get(); const Int m = B.Height(); const Int n = B.Width(); if( applyDFirst ) { if( onLeft ) { auto BTop = B( IR(0,minDim), IR(0,n) ); DiagonalScale( side, orientation, signature, BTop ); } else { auto BLeft = B( IR(0,m), IR(0,minDim) ); DiagonalScale( side, orientation, signature, BLeft ); } } ApplyPackedReflectors ( side, LOWER, VERTICAL, direction, conjugation, 0, A, householderScalars, B ); if( !applyDFirst ) { if( onLeft ) { auto BTop = B( IR(0,minDim), IR(0,n) ); DiagonalScale( side, orientation, signature, BTop ); } else { auto BLeft = B( IR(0,m), IR(0,minDim) ); DiagonalScale( side, orientation, signature, BLeft ); } } }
void SUMMA_NTDot ( Orientation orientB, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre, Int blockSize=2000 ) { EL_DEBUG_CSE const Int m = CPre.Height(); const Int n = CPre.Width(); const Grid& g = APre.Grid(); DistMatrixReadProxy<T,T,STAR,VC> AProx( APre ); auto& A = AProx.GetLocked(); ElementalProxyCtrl BCtrl; BCtrl.rowConstrain = true; BCtrl.rowAlign = A.RowAlign(); DistMatrixReadProxy<T,T,STAR,VC> BProx( BPre, BCtrl ); auto& B = BProx.GetLocked(); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& C = CProx.Get(); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); for( Int kOuter=0; kOuter<m; kOuter+=blockSize ) { const Int nbOuter = Min(blockSize,m-kOuter); const Range<Int> indOuter( kOuter, kOuter+nbOuter ); auto A1 = A( indOuter, ALL ); for( Int kInner=0; kInner<n; kInner+=blockSize ) { const Int nbInner = Min(blockSize,n-kInner); const Range<Int> indInner( kInner, kInner+nbInner ); auto B1 = B( indInner, ALL ); auto C11 = C( indOuter, indInner ); LocalGemm( NORMAL, orientB, alpha, A1, B1, C11_STAR_STAR ); AxpyContract( T(1), C11_STAR_STAR, C11 ); } } }
void SUMMA_NTC ( Orientation orientB, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { EL_DEBUG_CSE const Int sumDim = APre.Width(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); const bool conjugate = ( orientB == ADJOINT ); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); B1Trans_STAR_MR.AlignWith( C ); for( Int k=0; k<sumDim; k+=bsize ) { const Int nb = Min(bsize,sumDim-k); auto A1 = A( ALL, IR(k,k+nb) ); auto B1 = B( ALL, IR(k,k+nb) ); A1_MC_STAR = A1; B1_VR_STAR = B1; Transpose( B1_VR_STAR, B1Trans_STAR_MR, conjugate ); // C[MC,MR] += alpha A1[MC,*] (B1[MR,*])^T LocalGemm ( NORMAL, NORMAL, alpha, A1_MC_STAR, B1Trans_STAR_MR, T(1), C ); } }
void L1DistanceMatrix(direction_t dirA, direction_t dirB, T alpha, const El::ElementalMatrix<T> &APre, const El::ElementalMatrix<T> &BPre, T beta, El::ElementalMatrix<T> &CPre) { if (dirA == base::COLUMNS && dirB == base::COLUMNS) { // Use a SUMMA-like routine, with C as stationary // Basically an adaptation of Elementals TN case for stationary C. const El::Int m = CPre.Height(); const El::Int n = CPre.Width(); const El::Int sumDim = BPre.Height(); const El::Int bsize = El::Blocksize(); const El::Grid& g = APre.Grid(); El::DistMatrixReadProxy<T, T, El::MC, El::MR> AProx(APre); El::DistMatrixReadProxy<T, T, El::MC, El::MR> BProx(BPre); El::DistMatrixReadWriteProxy<T, T, El::MC, El::MR> CProx(CPre); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions El::DistMatrix<T, El::STAR, El::MC> A1_STAR_MC(g); El::DistMatrix<T, El::STAR, El::MR> B1_STAR_MR(g); A1_STAR_MC.AlignWith(C); B1_STAR_MR.AlignWith(C); El::Scale(beta, C); for(El::Int k = 0; k < sumDim; k += bsize) { const El::Int nb = std::min(bsize,sumDim-k); auto A1 = A(El::IR(k,k+nb), El::IR(0,m)); auto B1 = B(El::IR(k,k+nb), El::IR(0,n)); A1_STAR_MC = A1; B1_STAR_MR = B1; L1DistanceMatrix(base::COLUMNS, base::COLUMNS, alpha, A1_STAR_MC.LockedMatrix(), B1_STAR_MR.LockedMatrix(), T(1.0), C.Matrix()); } } // TODO the rest of the cases. }
void SUMMA_NTB ( Orientation orientB, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { EL_DEBUG_CSE const Int m = CPre.Height(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MR,STAR> A1Trans_MR_STAR(g); DistMatrix<T,STAR,MC> D1_STAR_MC(g); DistMatrix<T,MR,MC> D1_MR_MC(g); A1Trans_MR_STAR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); for( Int k=0; k<m; k+=bsize ) { const Int nb = Min(bsize,m-k); auto A1 = A( IR(k,k+nb), ALL ); auto C1 = C( IR(k,k+nb), ALL ); // D1[*,MC] := alpha A1[*,MR] (B[MC,MR])^T // = alpha (A1^T)[MR,*] (B^T)[MR,MC] Transpose( A1, A1Trans_MR_STAR ); LocalGemm( TRANSPOSE, orientB, alpha, A1Trans_MR_STAR, B, D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows Contract( D1_STAR_MC, D1_MR_MC ); Axpy( T(1), D1_MR_MC, C1 ); } }
void SUMMA_TNA ( Orientation orientA, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { DEBUG_CSE const Int n = CPre.Width(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,MR,STAR> D1_MR_STAR(g); DistMatrix<T,MR,MC > D1_MR_MC(g); B1_MC_STAR.AlignWith( A ); D1_MR_STAR.AlignWith( A ); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); auto B1 = B( ALL, IR(k,k+nb) ); auto C1 = C( ALL, IR(k,k+nb) ); // D1[MR,*] := alpha (A1[MC,MR])^T B1[MC,*] // = alpha (A1^T)[MR,MC] B1[MC,*] B1_MC_STAR = B1; LocalGemm( orientA, NORMAL, alpha, A, B1_MC_STAR, D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols Contract( D1_MR_STAR, D1_MR_MC ); Axpy( T(1), D1_MR_MC, C1 ); } }
void SUMMA_NTA ( Orientation orientB, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { EL_DEBUG_CSE const Int n = CPre.Width(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); const bool conjugate = ( orientB == ADJOINT ); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC,STAR> D1_MC_STAR(g); B1Trans_MR_STAR.AlignWith( A ); D1_MC_STAR.AlignWith( A ); for( Int k=0; k<n; k+=bsize ) { const Int nb = Min(bsize,n-k); auto B1 = B( IR(k,k+nb), ALL ); auto C1 = C( ALL, IR(k,k+nb) ); // C1[MC,*] := alpha A[MC,MR] (B1^[T/H])[MR,*] Transpose( B1, B1Trans_MR_STAR, conjugate ); LocalGemm( NORMAL, NORMAL, alpha, A, B1Trans_MR_STAR, D1_MC_STAR ); // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows AxpyContract( T(1), D1_MC_STAR, C1 ); } }
void SUMMA_TNB ( Orientation orientA, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { DEBUG_CSE const Int m = CPre.Height(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); const bool conjugate = ( orientA == ADJOINT ); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); for( Int k=0; k<m; k+=bsize ) { const Int nb = Min(bsize,m-k); auto A1 = A( ALL, IR(k,k+nb) ); auto C1 = C( IR(k,k+nb), ALL ); // D1[*,MR] := alpha (A1[MC,*])^[T/H] B[MC,MR] // = alpha (A1^[T/H])[*,MC] B[MC,MR] A1_MC_STAR = A1; // A1[MC,*] <- A1[MC,MR] LocalGemm( orientA, NORMAL, T(1), B, A1_MC_STAR, D1Trans_MR_STAR ); TransposeAxpyContract( alpha, D1Trans_MR_STAR, C1, conjugate ); } }
void SUMMA_TNC ( Orientation orientA, T alpha, const AbstractDistMatrix<T>& APre, const AbstractDistMatrix<T>& BPre, AbstractDistMatrix<T>& CPre ) { DEBUG_CSE const Int sumDim = BPre.Height(); const Int bsize = Blocksize(); const Grid& g = APre.Grid(); DistMatrixReadProxy<T,T,MC,MR> AProx( APre ); DistMatrixReadProxy<T,T,MC,MR> BProx( BPre ); DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& C = CProx.Get(); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); for( Int k=0; k<sumDim; k+=bsize ) { const Int nb = Min(bsize,sumDim-k); auto A1 = A( IR(k,k+nb), ALL ); auto B1 = B( IR(k,k+nb), ALL ); // C[MC,MR] += alpha (A1[*,MC])^T B1[*,MR] // = alpha (A1^T)[MC,*] B1[*,MR] A1_STAR_MC = A1; Transpose( B1, B1Trans_MR_STAR ); LocalGemm ( orientA, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, T(1), C ); } }
void QP ( const ElementalMatrix<Real>& APre, const ElementalMatrix<Real>& BPre, ElementalMatrix<Real>& XPre, const qp::direct::Ctrl<Real>& ctrl ) { DEBUG_CSE DistMatrixReadProxy<Real,Real,MC,MR> AProx( APre ), BProx( BPre ); DistMatrixWriteProxy<Real,Real,MC,MR> XProx( XPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& X = XProx.Get(); const Int n = A.Width(); const Int k = B.Width(); const Grid& g = A.Grid(); DistMatrix<Real> Q(g), AHat(g), bHat(g), c(g); Herk( LOWER, ADJOINT, Real(1), A, Q ); Zeros( AHat, 0, n ); Zeros( bHat, 0, 1 ); Zeros( X, n, k ); DistMatrix<Real> y(g), z(g); for( Int j=0; j<k; ++j ) { auto x = X( ALL, IR(j) ); auto b = B( ALL, IR(j) ); Zeros( c, n, 1 ); Gemv( ADJOINT, Real(-1), A, b, Real(0), c ); El::QP( Q, AHat, bHat, c, x, y, z, ctrl ); } }
void Ridge ( Orientation orientation, const AbstractDistMatrix<Field>& APre, const AbstractDistMatrix<Field>& BPre, Base<Field> gamma, AbstractDistMatrix<Field>& XPre, RidgeAlg alg ) { EL_DEBUG_CSE DistMatrixReadProxy<Field,Field,MC,MR> AProx( APre ), BProx( BPre ); DistMatrixWriteProxy<Field,Field,MC,MR> XProx( XPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& X = XProx.Get(); const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( orientation == TRANSPOSE && IsComplex<Field>::value ) LogicError("Transpose version of complex Ridge not yet supported"); if( m >= n ) { DistMatrix<Field> Z(A.Grid()); if( alg == RIDGE_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<Field>(1), A, Z ); else Herk( LOWER, NORMAL, Base<Field>(1), A, Z ); ShiftDiagonal( Z, Field(gamma*gamma) ); Cholesky( LOWER, Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else if( alg == RIDGE_QR ) { Zeros( Z, m+n, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+n), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); FillDiagonal( ZB, Field(gamma) ); // NOTE: This QR factorization could exploit the upper-triangular // structure of the diagonal matrix ZB qr::ExplicitTriang( Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { DistMatrix<Field> U(A.Grid()), V(A.Grid()); DistMatrix<Base<Field>,VR,STAR> s(A.Grid()); if( orientation == NORMAL ) { SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = false; SVD( A, U, s, V, ctrl ); } else { DistMatrix<Field> AAdj(A.Grid()); Adjoint( A, AAdj ); SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = true; SVD( AAdj, U, s, V ); } auto sigmaMap = [=]( const Base<Field>& sigma ) { return sigma / (sigma*sigma + gamma*gamma); }; EntrywiseMap( s, MakeFunction(sigmaMap) ); Gemm( ADJOINT, NORMAL, Field(1), U, B, X ); DiagonalScale( LEFT, NORMAL, s, X ); U = X; Gemm( NORMAL, NORMAL, Field(1), V, U, X ); } } else { LogicError("This case not yet supported"); } }
void StackedRuizEquil ( AbstractDistMatrix<Field>& APre, AbstractDistMatrix<Field>& BPre, AbstractDistMatrix<Base<Field>>& dRowAPre, AbstractDistMatrix<Base<Field>>& dRowBPre, AbstractDistMatrix<Base<Field>>& dColPre, bool progress ) { EL_DEBUG_CSE typedef Base<Field> Real; ElementalProxyCtrl control; control.colConstrain = true; control.rowConstrain = true; control.colAlign = 0; control.rowAlign = 0; DistMatrixReadWriteProxy<Field,Field,MC,MR> AProx( APre, control ); DistMatrixReadWriteProxy<Field,Field,MC,MR> BProx( BPre, control ); DistMatrixWriteProxy<Real,Real,MC,STAR> dRowAProx( dRowAPre, control ); DistMatrixWriteProxy<Real,Real,MC,STAR> dRowBProx( dRowBPre, control ); DistMatrixWriteProxy<Real,Real,MR,STAR> dColProx( dColPre, control ); auto& A = AProx.Get(); auto& B = BProx.Get(); auto& dRowA = dRowAProx.Get(); auto& dRowB = dRowBProx.Get(); auto& dCol = dColProx.Get(); const Int mA = A.Height(); const Int mB = B.Height(); const Int n = A.Width(); const Int nLocal = A.LocalWidth(); Ones( dRowA, mA, 1 ); Ones( dRowB, mB, 1 ); Ones( dCol, n, 1 ); // TODO(poulson): Expose these as control parameters // For now, simply hard-code the number of iterations const Int maxIter = 4; DistMatrix<Real,MC,STAR> rowScale(A.Grid()); DistMatrix<Real,MR,STAR> colScale(A.Grid()), colScaleB(B.Grid()); auto& colScaleLoc = colScale.Matrix(); auto& colScaleBLoc = colScaleB.Matrix(); const Int indent = PushIndent(); for( Int iter=0; iter<maxIter; ++iter ) { // Rescale the columns // ------------------- ColumnMaxNorms( A, colScale ); ColumnMaxNorms( B, colScaleB ); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) colScaleLoc(jLoc) = Max(colScaleLoc(jLoc),colScaleBLoc(jLoc)); EntrywiseMap( colScale, MakeFunction(DampScaling<Real>) ); DiagonalScale( LEFT, NORMAL, colScale, dCol ); DiagonalSolve( RIGHT, NORMAL, colScale, A ); DiagonalSolve( RIGHT, NORMAL, colScale, B ); // Rescale the rows // ---------------- RowMaxNorms( A, rowScale ); EntrywiseMap( rowScale, MakeFunction(DampScaling<Real>) ); DiagonalScale( LEFT, NORMAL, rowScale, dRowA ); DiagonalSolve( LEFT, NORMAL, rowScale, A ); RowMaxNorms( B, rowScale ); EntrywiseMap( rowScale, MakeFunction(DampScaling<Real>) ); DiagonalScale( LEFT, NORMAL, rowScale, dRowB ); DiagonalSolve( LEFT, NORMAL, rowScale, B ); } SetIndent( indent ); }
void Tikhonov ( Orientation orientation, const ElementalMatrix<F>& APre, const ElementalMatrix<F>& BPre, const ElementalMatrix<F>& G, ElementalMatrix<F>& XPre, TikhonovAlg alg ) { DEBUG_CSE DistMatrixReadProxy<F,F,MC,MR> AProx( APre ), BProx( BPre ); DistMatrixWriteProxy<F,F,MC,MR> XProx( XPre ); auto& A = AProx.GetLocked(); auto& B = BProx.GetLocked(); auto& X = XProx.Get(); const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( G.Width() != n ) LogicError("Tikhonov matrix was the wrong width"); if( orientation == TRANSPOSE && IsComplex<F>::value ) LogicError("Transpose version of complex Tikhonov not yet supported"); if( m >= n ) { DistMatrix<F> Z(A.Grid()); if( alg == TIKHONOV_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<F>(1), A, Z ); else Herk( LOWER, NORMAL, Base<F>(1), A, Z ); Herk( LOWER, ADJOINT, Base<F>(1), G, Base<F>(1), Z ); Cholesky( LOWER, Z ); } else { const Int mG = G.Height(); Zeros( Z, m+mG, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+mG), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); ZB = G; qr::ExplicitTriang( Z ); } if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, F(1), A, B, X ); else Gemm( NORMAL, NORMAL, F(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { LogicError("This case not yet supported"); } }
void ApplyQ ( LeftOrRight side, Orientation orientation, const ElementalMatrix<F>& APre, const ElementalMatrix<F>& phasePre, const ElementalMatrix<Base<F>>& signature, ElementalMatrix<F>& BPre ) { DEBUG_CSE const bool normal = (orientation==NORMAL); const bool onLeft = (side==LEFT); const bool applyDFirst = normal!=onLeft; const Int minDim = Min(APre.Height(),APre.Width()); const ForwardOrBackward direction = ( normal==onLeft ? FORWARD : BACKWARD ); const Conjugation conjugation = ( normal ? CONJUGATED : UNCONJUGATED ); DistMatrixReadProxy<F,F,MC,MR> AProx( APre ); DistMatrixReadWriteProxy<F,F,MC,MR> BProx( BPre ); auto& A = AProx.GetLocked(); auto& B = BProx.Get(); ElementalProxyCtrl phaseCtrl; phaseCtrl.rootConstrain = true; phaseCtrl.colConstrain = true; phaseCtrl.root = A.DiagonalRoot(); phaseCtrl.colAlign = A.DiagonalAlign(); DistMatrixReadProxy<F,F,MD,STAR> phaseProx( phasePre, phaseCtrl ); auto& phase = phaseProx.GetLocked(); const Int m = B.Height(); const Int n = B.Width(); if( applyDFirst ) { if( onLeft ) { auto BTop = B( IR(0,minDim), IR(0,n) ); DiagonalScale( side, orientation, signature, BTop ); } else { auto BLeft = B( IR(0,m), IR(0,minDim) ); DiagonalScale( side, orientation, signature, BLeft ); } } ApplyPackedReflectors ( side, UPPER, HORIZONTAL, direction, conjugation, 0, A, phase, B ); if( !applyDFirst ) { if( onLeft ) { auto BTop = B( IR(0,minDim), IR(0,n) ); DiagonalScale( side, orientation, signature, BTop ); } else { auto BLeft = B( IR(0,m), IR(0,minDim) ); DiagonalScale( side, orientation, signature, BLeft ); } } }