void runtest(size_t n) { std::vector<char> a1(n); std::vector<char> a11(n); std::vector<char> a12(n); std::vector<char> a21(n); std::vector<char> a22(n); memset(&a1[0], 0, n * sizeof(char)); auto func = [&] (char v, char *p1, char *p2) { volatile char *p = &a1[0]; for (size_t i = 0;i < n;i++) { f(&p[i], v, &p1[i], &p2[i]); } }; std::thread t1(func, (char)1, &a11[0], &a12[0]); std::thread t2(func, (char)2, &a21[0], &a22[0]); t1.join(); t2.join(); for (size_t i = 0;i < n;i++) { assert(a1[i] == 1 || a1[i] == 2); if (a1[i] == 1) { check(1, 2, a11[i], a12[i], a21[i], a22[i]); } else { assert(a1[i] == 2); check(2, 1, a21[i], a22[i], a11[i], a12[i]); } } }
inline void PanelHouseholder( DistMatrix<F>& A, DistMatrix<F,MD,STAR>& t ) { #ifndef RELEASE CallStackEntry entry("lq::PanelHouseholder"); if( A.Grid() != t.Grid() ) LogicError("{A,t} must be distributed over the same grid"); if( t.Height() != Min(A.Height(),A.Width()) || t.Width() != 1 ) LogicError ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) LogicError("t must be aligned with A's main diagonal"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<F> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<F,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<F> aTopRowConj(g); DistMatrix<F,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<F,MC, STAR> z_MC_STAR(g); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22, 1 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2, 1 ); View1x2( aTopRow, alpha11, a12 ); View1x2( ABottomPan, a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); //--------------------------------------------------------------------// // Compute the Householder reflector const F tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); // Apply the Householder reflector const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); F alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Zeros( z_MC_STAR, ABottomPan.Height(), 1 ); LocalGemv ( NORMAL, F(1), ABottomPan, aTopRowConj_STAR_MR, F(0), z_MC_STAR ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedMatrix(), aTopRowConj_STAR_MR.LockedMatrix(), ABottomPan.Matrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } }
inline void UnblockedBidiagU ( DistMatrix<Complex<R> >& A, DistMatrix<Complex<R>,MD,STAR>& tP, DistMatrix<Complex<R>,MD,STAR>& tQ ) { #ifndef RELEASE PushCallStack("BidiagU"); #endif const int tPHeight = std::max(A.Width()-1,0); const int tQHeight = A.Width(); #ifndef RELEASE if( A.Grid() != tP.Grid() || tP.Grid() != tQ.Grid() ) throw std::logic_error("Process grids do not match"); if( A.Height() < A.Width() ) throw std::logic_error("A must be at least as tall as it is wide"); if( tP.Viewing() && (tP.Height() != tPHeight || tP.Width() != 1) ) throw std::logic_error("tP is the wrong height"); if( tQ.Viewing() && (tQ.Height() != tQHeight || tQ.Width() != 1) ) throw std::logic_error("tQ is the wrong height"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); if( !tP.Viewing() ) tP.ResizeTo( tPHeight, 1 ); if( !tQ.Viewing() ) tQ.ResizeTo( tQHeight, 1 ); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), a01(g), A02(g), alpha12L(g), a12R(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), aB1(g), AB2(g), A20(g), a21(g), A22(g); // Temporary matrices DistMatrix<C,STAR,MR > a12_STAR_MR(g); DistMatrix<C,MC, STAR> aB1_MC_STAR(g); DistMatrix<C,MR, STAR> x12Adj_MR_STAR(g); DistMatrix<C,MC, STAR> w21_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); View2x1 ( aB1, alpha11, a21 ); View2x1 ( AB2, a12, A22 ); aB1_MC_STAR.AlignWith( aB1 ); a12_STAR_MR.AlignWith( a12 ); x12Adj_MR_STAR.AlignWith( AB2 ); w21_MC_STAR.AlignWith( A22 ); Zeros( a12.Width(), 1, x12Adj_MR_STAR ); Zeros( a21.Height(), 1, w21_MC_STAR ); const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() ); const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() ); const bool nextIsMyCol = ( g.Col() == a12.RowAlignment() ); //--------------------------------------------------------------------// // Find tauQ, u, and epsilonQ such that // I - conj(tauQ) | 1 | | 1, u^H | | alpha11 | = | epsilonQ | // | u | | a21 | | 0 | const C tauQ = Reflector( alpha11, a21 ); tQ.Set(A00.Height(),0,tauQ ); C epsilonQ=0; if( thisIsMyCol && thisIsMyRow ) epsilonQ = alpha11.GetLocal(0,0); // Set aB1 = | 1 | and form x12^H := (aB1^H AB2)^H = AB2^H aB1 // | u | alpha11.Set(0,0,C(1)); aB1_MC_STAR = aB1; internal::LocalGemv ( ADJOINT, C(1), AB2, aB1_MC_STAR, C(0), x12Adj_MR_STAR ); x12Adj_MR_STAR.SumOverCol(); // Update AB2 := AB2 - conj(tauQ) aB1 x12 // = AB2 - conj(tauQ) aB1 aB1^H AB2 // = (I - conj(tauQ) aB1 aB1^H) AB2 internal::LocalGer( -Conj(tauQ), aB1_MC_STAR, x12Adj_MR_STAR, AB2 ); // Put epsilonQ back instead of the temporary value, 1 if( thisIsMyCol && thisIsMyRow ) alpha11.SetLocal(0,0,epsilonQ); if( A22.Width() != 0 ) { // Due to the deficiencies in the BLAS ?gemv routines, this section // is easier if we temporarily conjugate a12 Conjugate( a12 ); // Expose the subvector we seek to zero, a12R PartitionRight( a12, alpha12L, a12R ); // Find tauP, v, and epsilonP such that // I - conj(tauP) | 1 | | 1, v^H | | alpha12L | = | epsilonP | // | v | | a12R^T | | 0 | const C tauP = Reflector( alpha12L, a12R ); tP.Set(A00.Height(),0,tauP); C epsilonP=0; if( nextIsMyCol && thisIsMyRow ) epsilonP = alpha12L.GetLocal(0,0); // Set a12^T = | 1 | and form w21 := A22 a12^T = A22 | 1 | // | v | | v | alpha12L.Set(0,0,C(1)); a12_STAR_MR = a12; internal::LocalGemv ( NORMAL, C(1), A22, a12_STAR_MR, C(0), w21_MC_STAR ); w21_MC_STAR.SumOverRow(); // A22 := A22 - tauP w21 conj(a12) // = A22 - tauP A22 a12^T conj(a12) // = A22 (I - tauP a12^T conj(a12)) // = A22 conj(I - conj(tauP) a12^H a12) // which compensates for the fact that the reflector was generated // on the conjugated a12. internal::LocalGer( -tauP, w21_MC_STAR, a12_STAR_MR, A22 ); // Put epsilonP back instead of the temporary value, 1 if( nextIsMyCol && thisIsMyRow ) alpha12L.SetLocal(0,0,epsilonP); // Undue the temporary conjugation Conjugate( a12 ); } //--------------------------------------------------------------------// aB1_MC_STAR.FreeAlignments(); a12_STAR_MR.FreeAlignments(); x12Adj_MR_STAR.FreeAlignments(); w21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void UnblockedBidiagU( DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("bidiag::UnblockedBidiagU"); if( A.Height() < A.Width() ) throw std::logic_error("A must be at least as tall as it is wide"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<R> ATL(g), ATR(g), A00(g), a01(g), A02(g), alpha12L(g), a12R(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), aB1(g), AB2(g), A20(g), a21(g), A22(g); // Temporary matrices DistMatrix<R,STAR,MR > a12_STAR_MR(g); DistMatrix<R,MC, STAR> aB1_MC_STAR(g); DistMatrix<R,MR, STAR> x12Trans_MR_STAR(g); DistMatrix<R,MC, STAR> w21_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); View2x1 ( aB1, alpha11, a21 ); View2x1 ( AB2, a12, A22 ); aB1_MC_STAR.AlignWith( aB1 ); a12_STAR_MR.AlignWith( a12 ); x12Trans_MR_STAR.AlignWith( AB2 ); w21_MC_STAR.AlignWith( A22 ); Zeros( a12.Width(), 1, x12Trans_MR_STAR ); Zeros( a21.Height(), 1, w21_MC_STAR ); const bool thisIsMyRow = ( g.Row() == alpha11.ColAlignment() ); const bool thisIsMyCol = ( g.Col() == alpha11.RowAlignment() ); const bool nextIsMyCol = ( g.Col() == a12.RowAlignment() ); //--------------------------------------------------------------------// // Find tauQ, u, and epsilonQ such that // I - tauQ | 1 | | 1, u^T | | alpha11 | = | epsilonQ | // | u | | a21 | = | 0 | const R tauQ = Reflector( alpha11, a21 ); R epsilonQ=0; if( thisIsMyCol && thisIsMyRow ) epsilonQ = alpha11.GetLocal(0,0); // Set aB1 = | 1 | and form x12^T := (aB1^T AB2)^T = AB2^T aB1 // | u | alpha11.Set(0,0,R(1)); aB1_MC_STAR = aB1; internal::LocalGemv ( TRANSPOSE, R(1), AB2, aB1_MC_STAR, R(0), x12Trans_MR_STAR ); x12Trans_MR_STAR.SumOverCol(); // Update AB2 := AB2 - tauQ aB1 x12 // = AB2 - tauQ aB1 aB1^T AB2 // = (I - tauQ aB1 aB1^T) AB2 internal::LocalGer( -tauQ, aB1_MC_STAR, x12Trans_MR_STAR, AB2 ); // Put epsilonQ back instead of the temporary value, 1 if( thisIsMyCol && thisIsMyRow ) alpha11.SetLocal(0,0,epsilonQ); if( A22.Width() != 0 ) { // Expose the subvector we seek to zero, a12R PartitionRight( a12, alpha12L, a12R ); // Find tauP, v, and epsilonP such that // I - tauP | 1 | | 1, v^T | | alpha12L | = | epsilonP | // | v | | a12R^T | = | 0 | const R tauP = Reflector( alpha12L, a12R ); R epsilonP=0; if( nextIsMyCol && thisIsMyRow ) epsilonP = alpha12L.GetLocal(0,0); // Set a12^T = | 1 | and form w21 := A22 a12^T = A22 | 1 | // | v | | v | alpha12L.Set(0,0,R(1)); a12_STAR_MR = a12; internal::LocalGemv ( NORMAL, R(1), A22, a12_STAR_MR, R(0), w21_MC_STAR ); w21_MC_STAR.SumOverRow(); // A22 := A22 - tauP w21 a12 // = A22 - tauP A22 a12^T a12 // = A22 (I - tauP a12^T a12) internal::LocalGer( -tauP, w21_MC_STAR, a12_STAR_MR, A22 ); // Put epsilonP back instead of the temporary value, 1 if( nextIsMyCol && thisIsMyRow ) alpha12L.SetLocal(0,0,epsilonP); } //--------------------------------------------------------------------// aB1_MC_STAR.FreeAlignments(); a12_STAR_MR.FreeAlignments(); x12Trans_MR_STAR.FreeAlignments(); w21_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void PanelLU ( DistMatrix<F, STAR,STAR>& A, DistMatrix<F, MC, STAR>& B, DistMatrix<int,STAR,STAR>& p, int pivotOffset ) { #ifndef RELEASE PushCallStack("internal::PanelLU"); if( A.Grid() != p.Grid() || p.Grid() != B.Grid() ) throw std::logic_error ("Matrices must be distributed over the same grid"); if( A.Width() != B.Width() ) throw std::logic_error("A and B must be the same width"); if( A.Height() != p.Height() || p.Width() != 1 ) throw std::logic_error("p must be a vector that conforms with A"); #endif const Grid& g = A.Grid(); const int r = g.Height(); const int colShift = B.ColShift(); const int colAlignment = B.ColAlignment(); // Matrix views DistMatrix<F,STAR,STAR> ATL(g), ATR(g), A00(g), a01(g), A02(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<F,MC,STAR> BL(g), BR(g), B0(g), b1(g), B2(g); const int width = A.Width(); const int numBytes = (width+1)*sizeof(F)+sizeof(int); std::vector<byte> sendData(numBytes); std::vector<byte> recvData(numBytes); // Extract pointers to send and recv data // TODO: Think of how to make this safer with respect to alignment issues F* sendBufFloat = (F*)&sendData[0]; F* recvBufFloat = (F*)&recvData[0]; int* sendBufInt = (int*)&sendData[(width+1)*sizeof(F)]; int* recvBufInt = (int*)&recvData[(width+1)*sizeof(F)]; // Start the algorithm PushBlocksizeStack( 1 ); PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionRight( B, BL, BR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); RepartitionRight ( BL, /**/ BR, B0, /**/ b1, B2 ); //--------------------------------------------------------------------// const int currentRow = a01.Height(); // Store the index/value of the pivot candidate in A F pivot = alpha11.GetLocal(0,0); int pivotRow = currentRow; for( int i=0; i<a21.Height(); ++i ) { F value = a21.GetLocal(i,0); if( FastAbs(value) > FastAbs(pivot) ) { pivot = value; pivotRow = currentRow + i + 1; } } // Update the pivot candidate to include local data from B for( int i=0; i<B.LocalHeight(); ++i ) { F value = b1.GetLocal(i,0); if( FastAbs(value) > FastAbs(pivot) ) { pivot = value; pivotRow = A.Height() + colShift + i*r; } } // Fill the send buffer with: // [ pivotValue | pivot row data | pivotRow ] if( pivotRow < A.Height() ) { sendBufFloat[0] = A.GetLocal(pivotRow,a10.Width()); const int ALDim = A.LocalLDim(); const F* ABuffer = A.LocalBuffer(pivotRow,0); for( int j=0; j<width; ++j ) sendBufFloat[j+1] = ABuffer[j*ALDim]; } else { const int localRow = ((pivotRow-A.Height())-colShift)/r; sendBufFloat[0] = b1.GetLocal(localRow,0); const int BLDim = B.LocalLDim(); const F* BBuffer = B.LocalBuffer(localRow,0); for( int j=0; j<width; ++j ) sendBufFloat[j+1] = BBuffer[j*BLDim]; } *sendBufInt = pivotRow; // Communicate to establish the pivot information mpi::AllReduce ( &sendData[0], &recvData[0], numBytes, PivotOp<F>(), g.ColComm() ); // Update the pivot vector pivotRow = *recvBufInt; p.SetLocal(currentRow,0,pivotRow+pivotOffset); // Copy the current row into the pivot row if( pivotRow < A.Height() ) { const int ALDim = A.LocalLDim(); F* ASetBuffer = A.LocalBuffer(pivotRow,0); const F* AGetBuffer = A.LocalBuffer(currentRow,0); for( int j=0; j<width; ++j ) ASetBuffer[j*ALDim] = AGetBuffer[j*ALDim]; } else { const int ownerRank = (colAlignment+(pivotRow-A.Height())) % r; if( g.Row() == ownerRank ) { const int localRow = ((pivotRow-A.Height())-colShift) / r; const int ALDim = A.LocalLDim(); const int BLDim = B.LocalLDim(); F* BBuffer = B.LocalBuffer(localRow,0); const F* ABuffer = A.LocalBuffer(currentRow,0); for( int j=0; j<width; ++j ) BBuffer[j*BLDim] = ABuffer[j*ALDim]; } } // Copy the pivot row into the current row { F* ABuffer = A.LocalBuffer(currentRow,0); const int ALDim = A.LocalLDim(); for( int j=0; j<width; ++j ) ABuffer[j*ALDim] = recvBufFloat[j+1]; } // Now we can perform the update of the current panel const F alpha = alpha11.GetLocal(0,0); if( alpha == F(0) ) throw SingularMatrixException(); const F alpha11Inv = F(1) / alpha; Scale( alpha11Inv, a21.LocalMatrix() ); Scale( alpha11Inv, b1.LocalMatrix() ); Geru( F(-1), a21.LocalMatrix(), a12.LocalMatrix(), A22.LocalMatrix() ); Geru( F(-1), b1.LocalMatrix(), a12.LocalMatrix(), B2.LocalMatrix() ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); SlidePartitionRight ( BL, /**/ BR, B0, b1, /**/ B2 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
void level_two() { vector<DPipe> DPIPES(44); vector<DoublePipe> DOUBPIPES(18); vector<CrossPipe> CROSSPIPES(3); DPipe background(600,400,1200,800); DPipe a0(50,750,100,40); DPipe a1(150,650,100,40); DPipe a2(150,550,100,40); DPipe a3(650,450,100,40); DPipe a4(550,550,100,40); DPipe a5(450,350,100,40); DPipe a6(550,250,100,40); DPipe a7(650,250,100,40); DPipe a8(750,350,100,40); DPipe a9(750,450,100,40); DPipe a10(750,550,100,40); DPipe a11(650,650,100,40); DPipe a12(550,650,100,40); DPipe a13(450,650,100,40); DPipe a14(350,550,100,40); DPipe a15(350,350,100,40); DPipe a16(350,250,100,40); DPipe a17(450,150,100,40); DPipe a18(550,150,100,40); DPipe a19(650,150,100,40); DPipe a20(750,150,100,40); DPipe a21(850,250,100,40); DPipe a22(850,350,100,40); DPipe a23(850,450,100,40); DPipe a24(850,550,100,40); DPipe a25(850,650,100,40); DPipe a26(750,750,100,40); DPipe a27(650,750,100,40); DPipe a28(550,750,100,40); DPipe a29(450,750,100,40); DPipe a30(350,750,100,40); DPipe a31(250,650,100,40); DPipe a32(250,550,100,40); DPipe a33(250,350,100,40); DPipe a34(250,250,100,40); DPipe a35(250,150,100,40); DPipe a36(350,50,100,40); DPipe a37(450,50,100,40); DPipe a38(550,50,100,40); DPipe a39(650,50,100,40); DPipe a40(750,50,100,40); DPipe a41(850,50,100,40); DPipe a42(950,150,100,40); DPipe a43(950,250,100,40); DoublePipe b0(150,750,70,40); DoublePipe b1(150,450,70,40); DoublePipe b2(550,450,70,40); DoublePipe b3(550,350,70,40); DoublePipe b4(650,350,70,40); DoublePipe b5(650,550,70,40); DoublePipe b6(450,550,70,40); DoublePipe b7(450,250,70,40); DoublePipe b8(750,250,70,40); DoublePipe b9(750,650,70,40); DoublePipe b10(350,650,70,40); DoublePipe b11(350,150,70,40); DoublePipe b12(850,150,70,40); DoublePipe b13(850,750,70,40); DoublePipe b14(250,750,70,40); DoublePipe b15(250,50,70,40); DoublePipe b16(950,50,70,40); DoublePipe b17(950,350,70,40); CrossPipe c0(250,450,100,40); CrossPipe c1(350,450,100,40); CrossPipe c2(450,450,100,40); DPIPES[0]=a0; DPIPES[1]=a1; DPIPES[2]=a2; DPIPES[3]=a3; DPIPES[4]=a4; DPIPES[5]=a5; DPIPES[6]=a6; DPIPES[7]=a7; DPIPES[8]=a8; DPIPES[9]=a9; DPIPES[10]=a10; DPIPES[11]=a11; DPIPES[12]=a12; DPIPES[13]=a13; DPIPES[14]=a14; DPIPES[15]=a15; DPIPES[16]=a16; DPIPES[17]=a17; DPIPES[18]=a18; DPIPES[19]=a19; DPIPES[20]=a20; DPIPES[21]=a21; DPIPES[22]=a22; DPIPES[23]=a23; DPIPES[24]=a24; DPIPES[25]=a25; DPIPES[26]=a26; DPIPES[27]=a27; DPIPES[28]=a28; DPIPES[29]=a29; DPIPES[30]=a30; DPIPES[31]=a31; DPIPES[32]=a32; DPIPES[33]=a33; DPIPES[34]=a34; DPIPES[35]=a35; DPIPES[36]=a36; DPIPES[37]=a37; DPIPES[38]=a38; DPIPES[39]=a39; DPIPES[40]=a40; DPIPES[41]=a41; DPIPES[42]=a42; DPIPES[43]=a43; DOUBPIPES[0]=b0; DOUBPIPES[1]=b1; DOUBPIPES[2]=b2; DOUBPIPES[3]=b3; DOUBPIPES[4]=b4; DOUBPIPES[5]=b5; DOUBPIPES[6]=b6; DOUBPIPES[7]=b7; DOUBPIPES[8]=b8; DOUBPIPES[9]=b9; DOUBPIPES[10]=b10; DOUBPIPES[11]=b11; DOUBPIPES[12]=b12; DOUBPIPES[13]=b13; DOUBPIPES[14]=b14; DOUBPIPES[15]=b15; DOUBPIPES[16]=b16; DOUBPIPES[17]=b17; CROSSPIPES[0]=c0; CROSSPIPES[1]=c1; CROSSPIPES[2]=c2; }
inline void PanelLQ( DistMatrix<Real>& A ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<Real> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); // Temporary distributions DistMatrix<Real,STAR,MR> aTopRow_STAR_MR(g); DistMatrix<Real,MC,STAR> z_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); aTopRow_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); Zeros( ABottomPan.Height(), 1, z_MC_STAR ); //--------------------------------------------------------------------// const Real tau = Reflector( alpha11, a12 ); const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); Real alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } aTopRow_STAR_MR = aTopRow; Gemv ( NORMAL, Real(1), ABottomPan.LockedLocalMatrix(), aTopRow_STAR_MR.LockedLocalMatrix(), Real(0), z_MC_STAR.LocalMatrix() ); z_MC_STAR.SumOverRow(); Ger ( -tau, z_MC_STAR.LockedLocalMatrix(), aTopRow_STAR_MR.LockedLocalMatrix(), ABottomPan.LocalMatrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// aTopRow_STAR_MR.FreeAlignments(); z_MC_STAR.FreeAlignments(); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
inline void PanelLQ ( DistMatrix<Complex<Real> >& A, DistMatrix<Complex<Real>,MD,STAR>& t ) { #ifndef RELEASE PushCallStack("internal::PanelLQ"); if( A.Grid() != t.Grid() ) throw std::logic_error("{A,t} must be distributed over the same grid"); if( t.Height() != std::min(A.Height(),A.Width()) || t.Width() != 1 ) throw std::logic_error ("t must be a vector of height equal to the minimum dimension of A"); if( !t.AlignedWithDiagonal( A, 0 ) ) throw std::logic_error("t must be aligned with A's main diagonal"); #endif typedef Complex<Real> C; const Grid& g = A.Grid(); // Matrix views DistMatrix<C> ATL(g), ATR(g), A00(g), a01(g), A02(g), aTopRow(g), ABottomPan(g), ABL(g), ABR(g), a10(g), alpha11(g), a12(g), A20(g), a21(g), A22(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), tau1(g), t2(g); // Temporary distributions DistMatrix<C> aTopRowConj(g); DistMatrix<C,STAR,MR > aTopRowConj_STAR_MR(g); DistMatrix<C,MC, STAR> z_MC_STAR(g); PushBlocksizeStack( 1 ); PartitionDownLeftDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); PartitionDown ( t, tT, tB, 0 ); while( ATL.Height() < A.Height() && ATL.Width() < A.Width() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ a01, A02, /*************/ /**********************/ /**/ a10, /**/ alpha11, a12, ABL, /**/ ABR, A20, /**/ a21, A22 ); RepartitionDown ( tT, t0, /**/ /****/ tau1, tB, t2 ); aTopRow.View1x2( alpha11, a12 ); ABottomPan.View1x2( a21, A22 ); aTopRowConj_STAR_MR.AlignWith( ABottomPan ); z_MC_STAR.AlignWith( ABottomPan ); Zeros( ABottomPan.Height(), 1, z_MC_STAR ); //--------------------------------------------------------------------// const C tau = Reflector( alpha11, a12 ); tau1.Set( 0, 0, tau ); const bool myDiagonalEntry = ( g.Row() == alpha11.ColAlignment() && g.Col() == alpha11.RowAlignment() ); C alpha = 0; if( myDiagonalEntry ) { alpha = alpha11.GetLocal(0,0); alpha11.SetLocal(0,0,1); } Conjugate( aTopRow, aTopRowConj ); aTopRowConj_STAR_MR = aTopRowConj; Gemv ( NORMAL, C(1), ABottomPan.LockedLocalMatrix(), aTopRowConj_STAR_MR.LockedLocalMatrix(), C(0), z_MC_STAR.LocalMatrix() ); z_MC_STAR.SumOverRow(); Ger ( -Conj(tau), z_MC_STAR.LockedLocalMatrix(), aTopRowConj_STAR_MR.LockedLocalMatrix(), ABottomPan.LocalMatrix() ); if( myDiagonalEntry ) alpha11.SetLocal(0,0,alpha); //--------------------------------------------------------------------// aTopRowConj_STAR_MR.FreeAlignments(); z_MC_STAR.FreeAlignments(); SlidePartitionDown ( tT, t0, tau1, /**/ /****/ tB, t2 ); SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, a01, /**/ A02, /**/ a10, alpha11, /**/ a12, /*************/ /**********************/ ABL, /**/ ABR, A20, a21, /**/ A22 ); } PopBlocksizeStack(); #ifndef RELEASE PopCallStack(); #endif }
int main () { a21 (0, 100, 5); return 0; }