function run() { BarPeriod = 240; // 4 hour bars // calculate the buy/sell signal vars Price = series(price()); vars DomPeriod = series(DominantPeriod(Price,30)); var LowPeriod = LowPass(DomPeriod,500); vars HP = series(HighPass(Price,LowPeriod)); vars Signal = series(Fisher(HP,500)); var Threshold = 1.0; // buy and sell Stop = 4*ATR(100); Trail = 4*ATR(100); if(crossUnder(Signal,-Threshold)) enterLong(); else if(crossOver(Signal,Threshold)) enterShort(); // plot signals and thresholds plot("DominantPeriod",LowPeriod,NEW,BLUE); plot("Signal",Signal[0],NEW,RED); plot("Threshold1",Threshold,0,BLACK); plot("Threshold2",-Threshold,0,BLACK); PlotWidth = 600; PlotHeight1 = 300; }
void MakeExplicitlyHermitian( UpperOrLower uplo, DistMatrix<F,MC,MR>& A ) { const Grid& g = A.Grid(); DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> A11Adj(g); DistMatrix<F,MR,MC> A11_MR_MC(g); DistMatrix<F,MR,MC> A21_MR_MC(g); DistMatrix<F,MR,MC> A12_MR_MC(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A11Adj.AlignWith( A11 ); A11_MR_MC.AlignWith( A11 ); A12_MR_MC.AlignWith( A21 ); A21_MR_MC.AlignWith( A12 ); //--------------------------------------------------------------------// A11_MR_MC = A11; A11Adj.ResizeTo( A11.Height(), A11.Width() ); Adjoint( A11_MR_MC.LocalMatrix(), A11Adj.LocalMatrix() ); if( uplo == LOWER ) { MakeTrapezoidal( LEFT, UPPER, 1, A11Adj ); Axpy( (F)1, A11Adj, A11 ); A21_MR_MC = A21; Adjoint( A21_MR_MC.LocalMatrix(), A12.LocalMatrix() ); } else { MakeTrapezoidal( LEFT, LOWER, -1, A11Adj ); Axpy( (F)1, A11Adj, A11 ); A12_MR_MC = A12; Adjoint( A12_MR_MC.LocalMatrix(), A21.LocalMatrix() ); } //--------------------------------------------------------------------// A21_MR_MC.FreeAlignments(); A12_MR_MC.FreeAlignments(); A11_MR_MC.FreeAlignments(); A11Adj.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
function tradeCounterTrend() { TimeFrame = 4; vars Price = series(price()); vars Filtered = series(BandPass(Price,optimize(30,25,35),0.5)); vars Signal = series(Fisher(Filtered,500)); var Threshold = optimize(1,0.5,1.5,0.1); Stop = optimize(4,2,10) * ATR(100); Trail = 4*ATR(100); if(crossUnder(Signal,-Threshold)) enterLong(); else if(crossOver(Signal,Threshold)) enterShort(); }
function run() { BarPeriod = 1440; StartDate = EndDate = 2014; if(is(EXITRUN)) printf("\nTrade costs in percent of volatility - - - - - -"); int n = 0; static var Cost[20]; while(asset(loop( "AUD/USD","EUR/USD","EUR/CHF","GBP/USD", "GER30","NAS100","SPX500","UK100","US30", "USD/CAD","USD/CHF","USD/JPY", "XAG/USD","XAU/USD"))) { if(is(INITRUN)) Cost[n] = 0; else Cost[n] += Spread/max(1*PIP,ATR(5)); if(is(EXITRUN)) printf("\n%s - %.1f%%",Asset,100*Cost[n]/Bar); n++; } }
// behavior is deterministic but undefined in the case where illegal // combinations of directions are passed in. axis_transformation& set_directions(const direction_2d& horizontal_dir, const direction_2d& vertical_dir) { int bit2 = (static_cast<orientation_2d>(horizontal_dir).to_int()) != 0; int bit1 = !(vertical_dir.to_int() & 1); int bit0 = !(horizontal_dir.to_int() & 1); atr_ = ATR((bit2 << 2) + (bit1 << 1) + bit0); return *this; }
inline void LQ( DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("LQ"); #endif if( IsComplex<R>::val ) throw std::logic_error("Called real routine with complex datatype"); const Grid& g = A.Grid(); // Matrix views DistMatrix<R,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ATopPan(g), ABottomPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); ATopPan.View1x2( A11, A12 ); ABottomPan.View1x2( A21, A22 ); //--------------------------------------------------------------------// internal::PanelLQ( ATopPan ); ApplyPackedReflectors ( RIGHT, UPPER, HORIZONTAL, FORWARD, 0, ATopPan, ABottomPan ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
function tradeTrend() { TimeFrame = 1; vars Price = series(price()); vars Trend = series(LowPass(Price,optimize(500,300,700))); Stop = optimize(4,2,10) * ATR(100); Trail = 0; vars MMI_Raw = series(MMI(Price,300)); vars MMI_Smooth = series(LowPass(MMI_Raw,500)); if(falling(MMI_Smooth)) { if(valley(Trend)) enterLong(); else if(peak(Trend)) enterShort(); } }
function run() { set(PLOTNOW); NumYears = 1; MaxBars = 210; PlotScale = 8; PlotWidth = 800; PlotHeight1 = 350; PlotHeight2 = 80; vars Price = series(price()); // plot Bollinger bands BBands(Price,30,2,2,MAType_SMA); plot("Bollinger1",rRealUpperBand,BAND1,0x00CC00); plot("Bollinger2",rRealLowerBand,BAND2,0xCC00FF00); plot("SAR",SAR(0.02,0.02,0.2),DOT,RED); ZigZag(Price,20*PIP,5,BLUE); // plot some other indicators plot("ATR (PIP)",ATR(20)/PIP,NEW,RED); plot("Doji",CDLDoji(),NEW+BARS,BLUE); plot("FractalDim",FractalDimension(Price,30),NEW,RED); plot("ShannonGain",ShannonGain(Price,40),NEW,RED); }
CLR(GREEN), CLR(YELLOW), CLR(BLUE), CLR(MAGENTA), CLR(CYAN), CLR(WHITE), { 0, 0 } }; #define ATR(attr) { CCA_##attr, (const char *) (#attr) } static struct { unsigned long attr; const char *name; } attrs[] = { ATR(ATTRIBUTES), ATR(NORMAL), ATR(STANDOUT), ATR(UNDERLINE), ATR(REVERSE), ATR(BLINK), ATR(DIM), ATR(BOLD), #ifdef A_ALTCHARSET ATR(ALTCHARSET), #endif #ifdef A_INVIS ATR(INVIS), #endif #ifdef A_PROTECT ATR(PROTECT),
void LSquare( DistMatrix<R>& A ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::LSquare"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Grid().Height() != A.Grid().Width() ) throw std::logic_error("The process grid must be square"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<R> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R> WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelLSquare ( ABR, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, TRANSPOSE, TRANSPOSE, R(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, R(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( LOWER, A11_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; } SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void SymmLLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A10, A11 ); LockedView2x1 ( AColPan, A11, A21 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Trans_MR_STAR.TransposeFrom( B1 ); LocalGemm ( NORMAL, TRANSPOSE, alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow ); LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void PanelHouseholder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("lq::PanelHouseholder"); if( A.Grid() != t.Grid() ) LogicError("{A,t} must be distributed over the same grid"); if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 ) LogicError ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) LogicError("t must be aligned with A's main diagonal"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<F> aTopRowConj(g); DistMatrix<F,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<F,MC, STAR> z_MC_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2, 1 ); View1x2( aTopRow, alpha11, a12 ); View1x2( ABottomPan, a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); //--------------------------------------------------------------------// // Compute the Householder reflector const F tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); // Apply the Householder reflector const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); F alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Zeros( z_MC_STAR, ABottomPan.Height(), 1 ); LocalGemv ( NORMAL, F(1), ABottomPan, aTopRowConj_STAR_MR, F(0), z_MC_STAR ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedMatrix(), aTopRowConj_STAR_MR.LockedMatrix(), ABottomPan.Matrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void TwoSidedTrsmUVar1 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar1"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01Adj_STAR_MR(g); DistMatrix<F,STAR,STAR> X11_STAR_STAR(g); DistMatrix<F,MR, MC > Z01_MR_MC(g); DistMatrix<F,MC, STAR> Z01_MC_STAR(g); DistMatrix<F,MR, STAR> Z01_MR_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A01 ); U01_MC_STAR.AlignWith( A00 ); U01_VR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); U01Adj_STAR_MR.AlignWith( A00 ); Y01.AlignWith( A01 ); Z01_MR_MC.AlignWith( A01 ); Z01_MC_STAR.AlignWith( A00 ); Z01_MR_STAR.AlignWith( A00 ); //--------------------------------------------------------------------// // Y01 := A00 U01 U01_MC_STAR = U01; U01_VR_STAR = U01_MC_STAR; U01Adj_STAR_MR.AdjointFrom( U01_VR_STAR ); Zeros( Z01_MC_STAR, A01.Height(), A01.Width() ); Zeros( Z01_MR_STAR, A01.Height(), A01.Width() ); LocalSymmetricAccumulateLU ( ADJOINT, F(1), A00, U01_MC_STAR, U01Adj_STAR_MR, Z01_MC_STAR, Z01_MR_STAR ); Z01_MR_MC.SumScatterFrom( Z01_MR_STAR ); Y01 = Z01_MR_MC; Y01.SumScatterUpdate( F(1), Z01_MC_STAR ); // A01 := inv(U00)' A01 // // This is the bottleneck because A01 only has blocksize columns Trsm( LEFT, UPPER, ADJOINT, diag, F(1), U00, A01 ); // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A11 := A11 - (U01' A01 + A01' U01) A01_VC_STAR = A01; U01_VC_STAR = U01_MC_STAR; Zeros( X11_STAR_STAR, A11.Height(), A11.Width() ); Her2k ( UPPER, ADJOINT, F(-1), A01_VC_STAR.Matrix(), U01_VC_STAR.Matrix(), F(0), X11_STAR_STAR.Matrix() ); A11.SumScatterUpdate( F(1), X11_STAR_STAR ); // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; U11_STAR_STAR = U11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A01 := A01 - 1/2 Y01 Axpy( F(-1)/F(2), Y01, A01 ); // A01 := A01 inv(U11) A01_VC_STAR = A01; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void Householder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("qr::Householder"); if( A.Grid() != t.Grid() ) LogicError("{A,s} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); if( t.Viewing() ) { if( !t.AlignedWithDiagonal( A ) ) LogicError("t was not aligned with A"); } else { t.AlignWithDiagonal( A ); } t.ResizeTo( Min(A.Height(),A.Width()), 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ALeftPan(g), ARightPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); View2x1 ( ALeftPan, A11, A21 ); View2x1 ( ARightPan, A12, A22 ); //--------------------------------------------------------------------// PanelHouseholder( ALeftPan, t1 ); ApplyQ( LEFT, ADJOINT, ALeftPan, t1, ARightPan ); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void internal::HermitianTridiagU ( DistMatrix<Complex<R>,MC, MR >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagU"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, 1 ); tDiag.ResizeTo( A.Height()-1, 1 ); if( g.InGrid() ) { // Matrix views DistMatrix<C,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C,MC, MR > WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A01_MC_STAR(g), A11_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A01_MR_STAR(g), A11_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W01_MC_STAR(g), W11_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W01_MR_STAR(g), W11_MR_STAR(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionUp ( tDiag, tT, tB, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); RepartitionUp ( tT, t0, t1, /**/ /**/ tB, t2 ); if( A00.Height() > 0 ) { WPan.AlignWith( A01 ); APan_MC_STAR.AlignWith( A00 ); WPan_MC_STAR.AlignWith( A00 ); APan_MR_STAR.AlignWith( A00 ); WPan_MR_STAR.AlignWith( A00 ); //------------------------------------------------------------// WPan.ResizeTo( ATL.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); internal::HermitianPanelTridiagU ( ATL, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionUp ( APan_MC_STAR, A01_MC_STAR, A11_MC_STAR, A11.Height() ); PartitionUp ( APan_MR_STAR, A01_MR_STAR, A11_MR_STAR, A11.Height() ); PartitionUp ( WPan_MC_STAR, W01_MC_STAR, W11_MC_STAR, A11.Height() ); PartitionUp ( WPan_MR_STAR, W01_MR_STAR, W11_MR_STAR, A11.Height() ); internal::LocalTrr2k ( UPPER, ADJOINT, ADJOINT, (C)-1, A01_MC_STAR, W01_MR_STAR, W01_MC_STAR, A01_MR_STAR, (C)1, A00 ); //------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( UPPER, A11_STAR_STAR.LocalMatrix(), t1_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionUp ( tT, t0, /**/ /**/ t1, tB, t2 ); SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HermitianTridiagU( DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::HermitianTridiagU"); if( A.Height() != A.Width() ) throw std::logic_error( "A must be square." ); #endif const Grid& g = A.Grid(); if( g.InGrid() ) { // Matrix views DistMatrix<R,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<R,MC, MR > WPan(g); DistMatrix<R,STAR,STAR> A11_STAR_STAR(g); DistMatrix<R,MC, STAR> APan_MC_STAR(g), A01_MC_STAR(g), A11_MC_STAR(g); DistMatrix<R,MR, STAR> APan_MR_STAR(g), A01_MR_STAR(g), A11_MR_STAR(g); DistMatrix<R,MC, STAR> WPan_MC_STAR(g), W01_MC_STAR(g), W11_MC_STAR(g); DistMatrix<R,MR, STAR> WPan_MR_STAR(g), W01_MR_STAR(g), W11_MR_STAR(g); PartitionUpDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() < A.Height() ) { RepartitionUpDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); if( A00.Height() > 0 ) { WPan.AlignWith( A01 ); APan_MC_STAR.AlignWith( A00 ); WPan_MC_STAR.AlignWith( A00 ); APan_MR_STAR.AlignWith( A00 ); WPan_MR_STAR.AlignWith( A00 ); //------------------------------------------------------------// WPan.ResizeTo( ATL.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ATL.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ATL.Height(), A11.Width() ); internal::HermitianPanelTridiagU ( ATL, WPan, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionUp ( APan_MC_STAR, A01_MC_STAR, A11_MC_STAR, A11.Height() ); PartitionUp ( APan_MR_STAR, A01_MR_STAR, A11_MR_STAR, A11.Height() ); PartitionUp ( WPan_MC_STAR, W01_MC_STAR, W11_MC_STAR, A11.Height() ); PartitionUp ( WPan_MR_STAR, W01_MR_STAR, W11_MR_STAR, A11.Height() ); internal::LocalTrr2k ( UPPER, TRANSPOSE, TRANSPOSE, (R)-1, A01_MC_STAR, W01_MR_STAR, W01_MC_STAR, A01_MR_STAR, (R)1, A00 ); //------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; HermitianTridiag( UPPER, A11_STAR_STAR.LocalMatrix() ); A11 = A11_STAR_STAR; } SlidePartitionUpDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HegstLLVar4( DistMatrix<F,MC,MR>& A, const DistMatrix<F,MC,MR>& L ) { #ifndef RELEASE PushCallStack("internal::HegstLLVar4"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( L.Height() != L.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != L.Height() ) throw std::logic_error("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F,MC,MR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,STAR,MR > L10_STAR_MR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10_STAR_MR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10_STAR_VR = L10; Y10_STAR_VR.ResizeTo( A10.Height(), A10.Width() ); Zero( Y10_STAR_VR ); Hemm ( LEFT, LOWER, (F)0.5, A11_STAR_STAR.LockedLocalMatrix(), L10_STAR_VR.LockedLocalMatrix(), (F)0, Y10_STAR_VR.LocalMatrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( (F)1, Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MR = L10_STAR_VR; L10_STAR_MC = L10_STAR_VR; internal::LocalTrr2k ( LOWER, ADJOINT, ADJOINT, (F)1, A10_STAR_MC, L10_STAR_MR, L10_STAR_MC, A10_STAR_MR, (F)1, A00 ); // A10 := A10 + 1/2 Y10 Axpy( (F)1, Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; internal::LocalTrmm ( LEFT, LOWER, ADJOINT, NON_UNIT, (F)1, L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; internal::LocalGemm ( NORMAL, NORMAL, (F)1, A21_MC_STAR, L10_STAR_MR, (F)1, A20 ); // A11 := L11' A11 L11 internal::LocalHegst ( LEFT, LOWER, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; internal::LocalTrmm ( RIGHT, LOWER, NORMAL, NON_UNIT, (F)1, L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A21_MC_STAR.FreeAlignments(); L10_STAR_VR.FreeAlignments(); L10_STAR_MR.FreeAlignments(); L10_STAR_MC.FreeAlignments(); Y10_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::CholeskyUVar3Square( DistMatrix<F,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyUVar3Square"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices."); if( A.Grid().Height() != A.Grid().Width() ) throw std::logic_error ("CholeskyUVar3Square assumes a square process grid."); #endif const Grid& g = A.Grid(); // Find the process holding our transposed data const int r = g.Height(); int transposeRank; { const int colAlignment = A.ColAlignment(); const int rowAlignment = A.RowAlignment(); const int colShift = A.ColShift(); const int rowShift = A.RowShift(); const int transposeRow = (colAlignment+rowShift) % r; const int transposeCol = (rowAlignment+colShift) % r; transposeRank = transposeRow + r*transposeCol; } const bool onDiagonal = ( transposeRank == g.VCRank() ); // Matrix views DistMatrix<F,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrix distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; internal::LocalCholesky( UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A12_STAR_VR = A12; internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; // SendRecv to form A12[* ,MC] from A12[* ,MR] A12_STAR_MC.ResizeTo( A12.Height(), A12.Width() ); { if( onDiagonal ) { const int size = A11.Height()*A22.LocalWidth(); MemCopy ( A12_STAR_MC.LocalBuffer(), A12_STAR_MR.LocalBuffer(), size ); } else { const int sendSize = A11.Height()*A22.LocalWidth(); const int recvSize = A11.Width()*A22.LocalHeight(); // We know that the ldim is the height since we have manually // created both temporary matrices. mpi::SendRecv ( A12_STAR_MR.LocalBuffer(), sendSize, transposeRank, 0, A12_STAR_MC.LocalBuffer(), recvSize, transposeRank, 0, g.VCComm() ); } } internal::LocalTrrk ( UPPER, ADJOINT, (F)-1, A12_STAR_MC, A12_STAR_MR, (F)1, A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmUVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrmmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A01_MC_STAR(g); DistMatrix<F,MR, STAR> A01_MR_STAR(g); DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,MR, STAR> U01_MR_STAR(g); DistMatrix<F,VC, STAR> U01_VC_STAR(g); DistMatrix<F,VC, STAR> Y01_VC_STAR(g); DistMatrix<F> Y01(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_MC_STAR.AlignWith( A00 ); A01_MR_STAR.AlignWith( A00 ); A01_VC_STAR.AlignWith( A00 ); U01_MC_STAR.AlignWith( A00 ); U01_MR_STAR.AlignWith( A00 ); U01_VC_STAR.AlignWith( A00 ); Y01.AlignWith( A01 ); Y01_VC_STAR.AlignWith( A01 ); //--------------------------------------------------------------------// // Y01 := U01 A11 A11_STAR_STAR = A11; U01_VC_STAR = U01; Y01_VC_STAR.ResizeTo( A01.Height(), A01.Width() ); Hemm ( RIGHT, UPPER, F(1), A11_STAR_STAR.LocalMatrix(), U01_VC_STAR.LocalMatrix(), F(0), Y01_VC_STAR.LocalMatrix() ); Y01 = Y01_VC_STAR; // A01 := U00 A01 Trmm( LEFT, UPPER, NORMAL, diag, F(1), U00, A01 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01, A01 ); // A00 := A00 + (U01 A01' + A01 U01') A01_MC_STAR = A01; U01_MC_STAR = U01; A01_VC_STAR = A01_MC_STAR; A01_MR_STAR = A01_VC_STAR; U01_MR_STAR = U01_MC_STAR; LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(1), U01_MC_STAR, A01_MR_STAR, A01_MC_STAR, U01_MR_STAR, F(1), A00 ); // A01 := A01 + 1/2 Y01 Axpy( F(1)/F(2), Y01_VC_STAR, A01_VC_STAR ); // A01 := A01 U11' U11_STAR_STAR = U11; LocalTrmm ( RIGHT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := U11 A11 U11' LocalTwoSidedTrmm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; //--------------------------------------------------------------------// A01_MC_STAR.FreeAlignments(); A01_MR_STAR.FreeAlignments(); A01_VC_STAR.FreeAlignments(); U01_MC_STAR.FreeAlignments(); U01_MR_STAR.FreeAlignments(); U01_VC_STAR.FreeAlignments(); Y01.FreeAlignments(); Y01_VC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrsmUVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( U.Height() != U.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != U.Height() ) LogicError("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,VC, STAR> A01_VC_STAR(g); DistMatrix<F,STAR,MC > A01Trans_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,VC > A12_STAR_VC(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> U12Trans_MR_STAR(g); DistMatrix<F,VR, STAR> U12Trans_VR_STAR(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,VC > U12_STAR_VC(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,VR > Y12_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A01_VC_STAR.AlignWith( A02 ); A01Trans_STAR_MC.AlignWith( A02 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_VC.AlignWith( A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); U12Trans_MR_STAR.AlignWith( A02 ); U12Trans_VR_STAR.AlignWith( A02 ); U12_STAR_VR.AlignWith( A02 ); U12_STAR_VC.AlignWith( A22 ); U12_STAR_MC.AlignWith( A22 ); Y12_STAR_VR.AlignWith( A12 ); //--------------------------------------------------------------------// // A01 := A01 inv(U11) A01_VC_STAR = A01; U11_STAR_STAR = U11; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, A01_VC_STAR ); A01 = A01_VC_STAR; // A11 := inv(U11)' A11 inv(U11) A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // A02 := A02 - A01 U12 A01Trans_STAR_MC.TransposeFrom( A01_VC_STAR ); U12Trans_MR_STAR.TransposeFrom( U12 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), A01Trans_STAR_MC, U12Trans_MR_STAR, F(1), A02 ); // Y12 := A11 U12 U12Trans_VR_STAR = U12Trans_MR_STAR; Zeros( U12_STAR_VR, A12.Height(), A12.Width() ); Transpose( U12Trans_VR_STAR.Matrix(), U12_STAR_VR.Matrix() ); Zeros( Y12_STAR_VR, A12.Height(), A12.Width() ); Hemm ( LEFT, UPPER, F(1), A11_STAR_STAR.Matrix(), U12_STAR_VR.Matrix(), F(0), Y12_STAR_VR.Matrix() ); // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12_STAR_VR, A12_STAR_VR ); // A22 := A22 - (A12' U12 + U12' A12) A12_STAR_MR = A12_STAR_VR; A12_STAR_VC = A12_STAR_VR; U12_STAR_VC = U12_STAR_VR; A12_STAR_MC = A12_STAR_VC; U12_STAR_MC = U12_STAR_VC; LocalTrr2k ( UPPER, ADJOINT, TRANSPOSE, ADJOINT, F(-1), A12_STAR_MC, U12Trans_MR_STAR, U12_STAR_MC, A12_STAR_MR, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12_STAR_VR, A12_STAR_VR ); A12 = A12_STAR_VR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /**********************************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } }
inline void LQ( DistMatrix<Complex<R>,MC,MR >& A, DistMatrix<Complex<R>,MD,STAR>& t ) { #ifndef RELEASE PushCallStack("LQ"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); if( t.Viewing() ) { if( !t.AlignedWithDiagonal( A ) ) throw std::logic_error("t was not aligned with A"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error("t was not the appropriate shape"); } else { t.AlignWithDiagonal( A ); t.ResizeTo( std::min(A.Height(),A.Width()), 1 ); } // Matrix views DistMatrix<C,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), ATopPan(g), ABottomPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); ATopPan.View1x2( A11, A12 ); ABottomPan.View1x2( A21, A22 ); //--------------------------------------------------------------------// internal::PanelLQ( ATopPan, t1 ); ApplyPackedReflectors ( RIGHT, UPPER, HORIZONTAL, FORWARD, CONJUGATED, 0, ATopPan, t1, ABottomPan ); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LU( DistMatrix<F>& A, DistMatrix<int,VC,STAR>& p ) { #ifndef RELEASE CallStackEntry entry("LU"); if( A.Grid() != p.Grid() ) throw std::logic_error("{A,p} must be distributed over the same grid"); if( p.Viewing() && (std::min(A.Height(),A.Width()) != p.Height() || p.Width() != 1) ) throw std::logic_error ("p must be a vector of the same height as the min dimension of A."); #endif const Grid& g = A.Grid(); if( !p.Viewing() ) p.ResizeTo( std::min(A.Height(),A.Width()), 1 ); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), AB(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<int,VC,STAR> pT(g), p0(g), pB(g), p1(g), p2(g); // Temporary distributions DistMatrix<F, STAR,STAR> A11_STAR_STAR(g); DistMatrix<F, MC, STAR> A21_MC_STAR(g); DistMatrix<F, STAR,VR > A12_STAR_VR(g); DistMatrix<F, STAR,MR > A12_STAR_MR(g); DistMatrix<int,STAR,STAR> p1_STAR_STAR(g); // Pivot composition std::vector<int> image, preimage; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( p, pT, pB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( pT, p0, /**/ /**/ p1, pB, p2 ); View1x2( AB, ABL, ABR ); const int pivotOffset = A01.Height(); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); p1_STAR_STAR.ResizeTo( p1.Height(), 1 ); //--------------------------------------------------------------------// A21_MC_STAR = A21; A11_STAR_STAR = A11; lu::Panel( A11_STAR_STAR, A21_MC_STAR, p1_STAR_STAR, pivotOffset ); ComposePivots( p1_STAR_STAR, pivotOffset, image, preimage ); ApplyRowPivots( AB, image, preimage ); // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A11 = A11_STAR_STAR; A12 = A12_STAR_MR; A21 = A21_MC_STAR; p1 = p1_STAR_STAR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlidePartitionDown ( pT, p0, p1, /**/ /**/ pB, p2 ); } }
inline void LocalSymmetricAccumulateLU ( Orientation orientation, T alpha, const DistMatrix<T>& A, const DistMatrix<T,MC, STAR>& B_MC_STAR, const DistMatrix<T,STAR,MR >& BAdjOrTrans_STAR_MR, DistMatrix<T,MC, STAR>& Z_MC_STAR, DistMatrix<T,MR, STAR>& Z_MR_STAR ) { #ifndef RELEASE PushCallStack("internal::LocalSymmetricAccumulateLU"); if( A.Grid() != B_MC_STAR.Grid() || B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() || BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() || Z_MC_STAR.Grid() != Z_MR_STAR.Grid() ) throw std::logic_error ("{A,B,Z} must be distributed over the same grid"); if( A.Height() != A.Width() || A.Height() != B_MC_STAR.Height() || A.Height() != BAdjOrTrans_STAR_MR.Width() || A.Height() != Z_MC_STAR.Height() || A.Height() != Z_MR_STAR.Height() || B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() || BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() || Z_MC_STAR.Width() != Z_MR_STAR.Width() ) { std::ostringstream msg; msg << "Nonconformal LocalSymmetricAccumulateLU: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B[MC,* ] ~ " << B_MC_STAR.Height() << " x " << B_MC_STAR.Width() << "\n" << " B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x " << BAdjOrTrans_STAR_MR.Width() << "\n" << " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x " << Z_MC_STAR.Width() << "\n" << " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x " << Z_MR_STAR.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( B_MC_STAR.ColAlignment() != A.ColAlignment() || BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() || Z_MC_STAR.ColAlignment() != A.ColAlignment() || Z_MR_STAR.ColAlignment() != A.RowAlignment() ) throw std::logic_error("Partial matrix distributions are misaligned"); #endif const Grid& g = A.Grid(); DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<T> D11(g); DistMatrix<T,MC,STAR> BT_MC_STAR(g), B0_MC_STAR(g), BB_MC_STAR(g), B1_MC_STAR(g), B2_MC_STAR(g); DistMatrix<T,STAR,MR> BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g), B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g), B2AdjOrTrans_STAR_MR(g); DistMatrix<T,MC,STAR> ZT_MC_STAR(g), Z0_MC_STAR(g), ZB_MC_STAR(g), Z1_MC_STAR(g), Z2_MC_STAR(g); DistMatrix<T,MR,STAR> ZT_MR_STAR(g), Z0_MR_STAR(g), ZB_MR_STAR(g), Z1_MR_STAR(g), Z2_MR_STAR(g); const int ratio = std::max( g.Height(), g.Width() ); PushBlocksizeStack( ratio*Blocksize() ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B_MC_STAR, BT_MC_STAR, BB_MC_STAR, 0 ); LockedPartitionRight ( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 ); PartitionDown ( Z_MC_STAR, ZT_MC_STAR, ZB_MC_STAR, 0 ); PartitionDown ( Z_MR_STAR, ZT_MR_STAR, ZB_MR_STAR, 0 ); while( ATL.Height() < A.Height() ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT_MC_STAR, B0_MC_STAR, /**********/ /**********/ B1_MC_STAR, BB_MC_STAR, B2_MC_STAR ); LockedRepartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR, B2AdjOrTrans_STAR_MR ); RepartitionDown ( ZT_MC_STAR, Z0_MC_STAR, /**********/ /**********/ Z1_MC_STAR, ZB_MC_STAR, Z2_MC_STAR ); RepartitionDown ( ZT_MR_STAR, Z0_MR_STAR, /**********/ /**********/ Z1_MR_STAR, ZB_MR_STAR, Z2_MR_STAR ); D11.AlignWith( A11 ); //--------------------------------------------------------------------// D11 = A11; MakeTrapezoidal( LEFT, UPPER, 0, D11 ); LocalGemm ( NORMAL, orientation, alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); MakeTrapezoidal( LEFT, UPPER, 1, D11 ); LocalGemm ( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR ); LocalGemm ( NORMAL, orientation, alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR ); LocalGemm ( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR ); //--------------------------------------------------------------------// D11.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT_MC_STAR, B0_MC_STAR, B1_MC_STAR, /**********/ /**********/ BB_MC_STAR, B2_MC_STAR ); SlideLockedPartitionRight ( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR, B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR ); SlidePartitionDown ( ZT_MC_STAR, Z0_MC_STAR, Z1_MC_STAR, /**********/ /**********/ ZB_MC_STAR, Z2_MC_STAR ); SlidePartitionDown ( ZT_MR_STAR, Z0_MR_STAR, Z1_MR_STAR, /**********/ /**********/ ZB_MR_STAR, Z2_MR_STAR ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void LU( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("LU"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_VR.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A21_MC_STAR.AlignWith( A22 ); A11_STAR_STAR.ResizeTo( A11.Height(), A11.Width() ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalLU( A11_STAR_STAR ); A11 = A11_STAR_STAR; A21_MC_STAR = A21; LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A21_MC_STAR ); A21 = A21_MC_STAR; // Perhaps we should give up perfectly distributing this operation since // it's total contribution is only O(n^2) A12_STAR_VR = A12; LocalTrsm ( LEFT, LOWER, NORMAL, UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MR = A12_STAR_VR; LocalGemm( NORMAL, NORMAL, F(-1), A21_MC_STAR, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_VR.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
inline void CholeskyUVar3( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyUVar3"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary matrix distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ABR.Height() > 0 ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( UPPER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A12_STAR_VR ); A12_STAR_MC = A12_STAR_VR; A12_STAR_MR = A12_STAR_VR; LocalTrrk ( UPPER, ADJOINT, F(-1), A12_STAR_MC, A12_STAR_MR, F(1), A22 ); A12 = A12_STAR_MR; //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrmmLVar4 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TwoSidedTrmmLVar4"); if( A.Height() != A.Width() ) LogicError("A must be square"); if( L.Height() != L.Width() ) LogicError("Triangular matrices must be square"); if( A.Height() != L.Height() ) LogicError("A and L must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); // Temporary distributions DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> A21_MC_STAR(g); DistMatrix<F,STAR,VR > L10_STAR_VR(g); DistMatrix<F,MR, STAR> L10Adj_MR_STAR(g); DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,VR > Y10_STAR_VR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); A10_STAR_VR.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A10_STAR_MC.AlignWith( A00 ); A21_MC_STAR.AlignWith( A20 ); L10_STAR_VR.AlignWith( A00 ); L10Adj_MR_STAR.AlignWith( A00 ); L10_STAR_MC.AlignWith( A00 ); Y10_STAR_VR.AlignWith( A10 ); //--------------------------------------------------------------------// // Y10 := A11 L10 A11_STAR_STAR = A11; L10Adj_MR_STAR.AdjointFrom( L10 ); L10_STAR_VR.AdjointFrom( L10Adj_MR_STAR ); Zeros( Y10_STAR_VR, A10.Height(), A10.Width() ); Hemm ( LEFT, LOWER, F(1), A11_STAR_STAR.LockedMatrix(), L10_STAR_VR.LockedMatrix(), F(0), Y10_STAR_VR.Matrix() ); // A10 := A10 + 1/2 Y10 A10_STAR_VR = A10; Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A00 := A00 + (A10' L10 + L10' A10) A10_STAR_MR = A10_STAR_VR; A10_STAR_MC = A10_STAR_VR; L10_STAR_MC = L10_STAR_VR; LocalTrr2k ( LOWER, ADJOINT, ADJOINT, ADJOINT, F(1), A10_STAR_MC, L10Adj_MR_STAR, L10_STAR_MC, A10_STAR_MR, F(1), A00 ); // A10 := A10 + 1/2 Y10 Axpy( F(1)/F(2), Y10_STAR_VR, A10_STAR_VR ); // A10 := L11' A10 L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, ADJOINT, diag, F(1), L11_STAR_STAR, A10_STAR_VR ); A10 = A10_STAR_VR; // A20 := A20 + A21 L10 A21_MC_STAR = A21; LocalGemm ( NORMAL, ADJOINT, F(1), A21_MC_STAR, L10Adj_MR_STAR, F(1), A20 ); // A11 := L11' A11 L11 LocalTwoSidedTrmm( LOWER, diag, A11_STAR_STAR, L11_STAR_STAR ); A11 = A11_STAR_STAR; // A21 := A21 L11 A21_VC_STAR = A21_MC_STAR; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); } }
inline void HPDInverseLVar2( DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::HPDInverseLVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,VR > A10_STAR_VR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,STAR,MC > A10_STAR_MC(g); DistMatrix<F,STAR,MR > A10_STAR_MR(g); DistMatrix<F,STAR,MC > A21Trans_STAR_MC(g); DistMatrix<F,VR, STAR> A21_VR_STAR(g); DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10_STAR_VR.AlignWith( A00 ); A21_VC_STAR.AlignWith( A20 ); A10_STAR_MC.AlignWith( A00 ); A10_STAR_MR.AlignWith( A00 ); A21Trans_STAR_MC.AlignWith( A20 ); A21_VR_STAR.AlignWith( A22 ); A21Adj_STAR_MR.AlignWith( A22 ); //--------------------------------------------------------------------// A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A10_STAR_VR = A10; LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A10_STAR_MC = A10_STAR_VR; A10_STAR_MR = A10_STAR_VR; LocalTrrk ( LOWER, ADJOINT, F(1), A10_STAR_MC, A10_STAR_MR, F(1), A00 ); A21Trans_STAR_MC.TransposeFrom( A21_VC_STAR ); LocalGemm ( TRANSPOSE, NORMAL, F(-1), A21Trans_STAR_MC, A10_STAR_MR, F(1), A20 ); A21_VR_STAR = A21_VC_STAR; A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR ); LocalTrrk ( LOWER, TRANSPOSE, F(-1), A21Trans_STAR_MC, A21Adj_STAR_MR, F(1), A22 ); LocalTrsm ( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR ); LocalTrsm ( RIGHT, LOWER, NORMAL, NON_UNIT, F(-1), A11_STAR_STAR, A21_VC_STAR ); LocalTriangularInverse( LOWER, NON_UNIT, A11_STAR_STAR ); LocalTrtrmm( ADJOINT, LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; A10 = A10_STAR_VR; A21 = A21_VC_STAR; //--------------------------------------------------------------------// A10_STAR_VR.FreeAlignments(); A21_VC_STAR.FreeAlignments(); A10_STAR_MC.FreeAlignments(); A10_STAR_MR.FreeAlignments(); A21Trans_STAR_MC.FreeAlignments(); A21_VR_STAR.FreeAlignments(); A21Adj_STAR_MR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TwoSidedTrsmUVar5 ( UnitOrNonUnit diag, DistMatrix<F>& A, const DistMatrix<F>& U ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmUVar5"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( U.Height() != U.Width() ) throw std::logic_error("Triangular matrices must be square"); if( A.Height() != U.Height() ) throw std::logic_error("A and U must be the same size"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); // Temporary distributions DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,STAR,MC > A12_STAR_MC(g); DistMatrix<F,STAR,MR > A12_STAR_MR(g); DistMatrix<F,STAR,VC > A12_STAR_VC(g); DistMatrix<F,STAR,VR > A12_STAR_VR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MC > U12_STAR_MC(g); DistMatrix<F,STAR,MR > U12_STAR_MR(g); DistMatrix<F,STAR,VC > U12_STAR_VC(g); DistMatrix<F,STAR,VR > U12_STAR_VR(g); DistMatrix<F,STAR,VR > Y12_STAR_VR(g); DistMatrix<F> Y12(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); A12_STAR_MC.AlignWith( A22 ); A12_STAR_MR.AlignWith( A22 ); A12_STAR_VC.AlignWith( A22 ); A12_STAR_VR.AlignWith( A22 ); U12_STAR_MC.AlignWith( A22 ); U12_STAR_MR.AlignWith( A22 ); U12_STAR_VC.AlignWith( A22 ); U12_STAR_VR.AlignWith( A22 ); Y12.AlignWith( A12 ); Y12_STAR_VR.AlignWith( A12 ); //--------------------------------------------------------------------// // A11 := inv(U11)' A11 inv(U11) U11_STAR_STAR = U11; A11_STAR_STAR = A11; LocalTwoSidedTrsm( UPPER, diag, A11_STAR_STAR, U11_STAR_STAR ); A11 = A11_STAR_STAR; // Y12 := A11 U12 U12_STAR_VR = U12; Y12_STAR_VR.ResizeTo( A12.Height(), A12.Width() ); Hemm ( LEFT, UPPER, F(1), A11_STAR_STAR.LocalMatrix(), U12_STAR_VR.LocalMatrix(), F(0), Y12_STAR_VR.LocalMatrix() ); Y12 = Y12_STAR_VR; // A12 := inv(U11)' A12 A12_STAR_VR = A12; LocalTrsm ( LEFT, UPPER, ADJOINT, diag, F(1), U11_STAR_STAR, A12_STAR_VR ); A12 = A12_STAR_VR; // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A22 := A22 - (A12' U12 + U12' A12) A12_STAR_VR = A12; A12_STAR_VC = A12_STAR_VR; U12_STAR_VC = U12_STAR_VR; A12_STAR_MC = A12_STAR_VC; U12_STAR_MC = U12_STAR_VC; A12_STAR_MR = A12_STAR_VR; U12_STAR_MR = U12_STAR_VR; LocalTrr2k ( UPPER, ADJOINT, ADJOINT, F(-1), U12_STAR_MC, A12_STAR_MR, A12_STAR_MC, U12_STAR_MR, F(1), A22 ); // A12 := A12 - 1/2 Y12 Axpy( F(-1)/F(2), Y12, A12 ); // A12 := A12 inv(U22) // // This is the bottleneck because A12 only has blocksize rows Trsm( RIGHT, UPPER, NORMAL, diag, F(1), U22, A12 ); //--------------------------------------------------------------------// A12_STAR_MC.FreeAlignments(); A12_STAR_MR.FreeAlignments(); A12_STAR_VC.FreeAlignments(); A12_STAR_VR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); U12_STAR_MR.FreeAlignments(); U12_STAR_VC.FreeAlignments(); U12_STAR_VR.FreeAlignments(); Y12.FreeAlignments(); Y12_STAR_VR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void LVar2( DistMatrix<F>& A ) { #ifndef RELEASE CallStackEntry entry("cholesky::LVar2"); if( A.Height() != A.Width() ) LogicError("Can only compute Cholesky factor of square matrices"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); // Temporary distributions DistMatrix<F,MR, STAR> A10Adj_MR_STAR(g); DistMatrix<F,STAR,STAR> A11_STAR_STAR(g); DistMatrix<F,VC, STAR> A21_VC_STAR(g); DistMatrix<F,MC, STAR> X11_MC_STAR(g); DistMatrix<F,MC, STAR> X21_MC_STAR(g); // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); A10Adj_MR_STAR.AlignWith( A10 ); X11_MC_STAR.AlignWith( A10 ); X21_MC_STAR.AlignWith( A20 ); //--------------------------------------------------------------------// A10Adj_MR_STAR.AdjointFrom( A10 ); LocalGemm( NORMAL, NORMAL, F(1), A10, A10Adj_MR_STAR, X11_MC_STAR ); A11.SumScatterUpdate( F(-1), X11_MC_STAR ); A11_STAR_STAR = A11; LocalCholesky( LOWER, A11_STAR_STAR ); A11 = A11_STAR_STAR; LocalGemm( NORMAL, NORMAL, F(1), A20, A10Adj_MR_STAR, X21_MC_STAR ); A21.SumScatterUpdate( F(-1), X21_MC_STAR ); A21_VC_STAR = A21; LocalTrsm ( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR ); A21 = A21_VC_STAR; //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } }
void LSquare ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,STAR,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("hermitian_tridiag::LSquare"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); #ifndef RELEASE if( g.Height() != g.Width() ) throw std::logic_error("The process grid must be square"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( t.Viewing() ) throw std::logic_error("t must not be a view"); #endif typedef Complex<R> C; DistMatrix<C,MD,STAR> tDiag(g); tDiag.AlignWithDiagonal( A, -1 ); tDiag.ResizeTo( A.Height()-1, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), A01(g), A02(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), A20(g), A21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); // Temporary distributions DistMatrix<C> WPan(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> A11_STAR_STAR(g); DistMatrix<C,MC, STAR> APan_MC_STAR(g), A11_MC_STAR(g), A21_MC_STAR(g); DistMatrix<C,MR, STAR> APan_MR_STAR(g), A11_MR_STAR(g), A21_MR_STAR(g); DistMatrix<C,MC, STAR> WPan_MC_STAR(g), W11_MC_STAR(g), W21_MC_STAR(g); DistMatrix<C,MR, STAR> WPan_MR_STAR(g), W11_MR_STAR(g), W21_MR_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( tDiag, tT, tB, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); RepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2 ); if( A22.Height() > 0 ) { WPan.AlignWith( A11 ); APan_MC_STAR.AlignWith( A11 ); WPan_MC_STAR.AlignWith( A11 ); APan_MR_STAR.AlignWith( A11 ); WPan_MR_STAR.AlignWith( A11 ); //----------------------------------------------------------------// WPan.ResizeTo( ABR.Height(), A11.Width() ); APan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MC_STAR.ResizeTo( ABR.Height(), A11.Width() ); APan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); WPan_MR_STAR.ResizeTo( ABR.Height(), A11.Width() ); hermitian_tridiag::PanelLSquare ( ABR, WPan, t1, APan_MC_STAR, APan_MR_STAR, WPan_MC_STAR, WPan_MR_STAR ); PartitionDown ( APan_MC_STAR, A11_MC_STAR, A21_MC_STAR, A11.Height() ); PartitionDown ( APan_MR_STAR, A11_MR_STAR, A21_MR_STAR, A11.Height() ); PartitionDown ( WPan_MC_STAR, W11_MC_STAR, W21_MC_STAR, A11.Height() ); PartitionDown ( WPan_MR_STAR, W11_MR_STAR, W21_MR_STAR, A11.Height() ); LocalTrr2k ( LOWER, ADJOINT, ADJOINT, C(-1), A21_MC_STAR, W21_MR_STAR, W21_MC_STAR, A21_MR_STAR, C(1), A22 ); //----------------------------------------------------------------// WPan_MR_STAR.FreeAlignments(); APan_MR_STAR.FreeAlignments(); WPan_MC_STAR.FreeAlignments(); APan_MC_STAR.FreeAlignments(); WPan.FreeAlignments(); } else { A11_STAR_STAR = A11; t1_STAR_STAR.ResizeTo( t1.Height(), 1 ); HermitianTridiag ( LOWER, A11_STAR_STAR.Matrix(), t1_STAR_STAR.Matrix() ); A11 = A11_STAR_STAR; t1 = t1_STAR_STAR; } SlidePartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } // Redistribute from matrix-diagonal form to fully replicated t = tDiag; }