inline void Syr2 ( UpperOrLower uplo, T alpha, const DistMatrix<T>& x, const DistMatrix<T>& y, DistMatrix<T>& A, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("Syr2"); if( A.Grid() != x.Grid() || x.Grid() != y.Grid() ) LogicError ("{A,x,y} must be distributed over the same grid"); if( A.Height() != A.Width() ) LogicError("A must be square"); const Int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const Int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( A.Height() != xLength || A.Height() != yLength ) { std::ostringstream msg; msg << "A must conform with x: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); const Int localHeight = A.LocalHeight(); const Int localWidth = A.LocalWidth(); const Int r = g.Height(); const Int c = g.Width(); const Int colShift = A.ColShift(); const Int rowShift = A.RowShift(); if( x.Width() == 1 && y.Width() == 1 ) { DistMatrix<T,MC,STAR> x_MC_STAR(g), y_MC_STAR(g); DistMatrix<T,MR,STAR> x_MR_STAR(g), y_MR_STAR(g); x_MC_STAR.AlignWith( A ); x_MR_STAR.AlignWith( A ); y_MC_STAR.AlignWith( A ); y_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; x_MR_STAR = x_MC_STAR; y_MC_STAR = y; y_MR_STAR = y_MC_STAR; const T* xBuffer = x_MC_STAR.LockedBuffer(); const T* yBuffer = y_MC_STAR.LockedBuffer(); if( uplo == LOWER ) { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightAboveDiag = Length(j,colShift,r); const T beta = y_MR_STAR.GetLocal(jLoc,0); const T kappa = x_MR_STAR.GetLocal(jLoc,0); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=heightAboveDiag; iLoc<localHeight; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc] + delta*yBuffer[iLoc]; } } else { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightToDiag = Length(j+1,colShift,r); const T beta = y_MR_STAR.GetLocal(jLoc,0); const T kappa = x_MR_STAR.GetLocal(jLoc,0); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=0; iLoc<heightToDiag; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc] + delta*yBuffer[iLoc]; } } //--------------------------------------------------------------------// } else if( x.Width() == 1 ) { DistMatrix<T,MC,STAR> x_MC_STAR(g); DistMatrix<T,MR,STAR> x_MR_STAR(g); DistMatrix<T,STAR,MC> y_STAR_MC(g); DistMatrix<T,STAR,MR> y_STAR_MR(g); x_MC_STAR.AlignWith( A ); x_MR_STAR.AlignWith( A ); y_STAR_MC.AlignWith( A ); y_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; x_MR_STAR = x_MC_STAR; y_STAR_MR = y; y_STAR_MC = y_STAR_MR; const T* xBuffer = x_MC_STAR.LockedBuffer(); const T* yBuffer = y_STAR_MC.LockedBuffer(); const Int incy = y_STAR_MC.LDim(); if( uplo == LOWER ) { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightAboveDiag = Length(j,colShift,r); const T beta = y_STAR_MR.GetLocal(0,jLoc); const T kappa = x_MR_STAR.GetLocal(jLoc,0); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=heightAboveDiag; iLoc<localHeight; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc] + delta*yBuffer[iLoc*incy]; } } else { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightToDiag = Length(j+1,colShift,r); const T beta = y_STAR_MR.GetLocal(0,jLoc); const T kappa = x_MR_STAR.GetLocal(jLoc,0); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=0; iLoc<heightToDiag; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc] + delta*yBuffer[iLoc*incy]; } } //--------------------------------------------------------------------// } else if( y.Width() == 1 ) { DistMatrix<T,STAR,MC> x_STAR_MC(g); DistMatrix<T,STAR,MR> x_STAR_MR(g); DistMatrix<T,MC,STAR> y_MC_STAR(g); DistMatrix<T,MR,STAR> y_MR_STAR(g); x_STAR_MC.AlignWith( A ); x_STAR_MR.AlignWith( A ); y_MC_STAR.AlignWith( A ); y_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MR = x; x_STAR_MC = x_STAR_MR; y_MC_STAR = y; y_MR_STAR = y_MC_STAR; const T* xBuffer = x_STAR_MC.LockedBuffer(); const T* yBuffer = y_MC_STAR.LockedBuffer(); const Int incx = x_STAR_MC.LDim(); if( uplo == LOWER ) { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightAboveDiag = Length(j,colShift,r); const T beta = x_STAR_MR.GetLocal(0,jLoc); const T kappa = y_MR_STAR.GetLocal(jLoc,0); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=heightAboveDiag; iLoc<localHeight; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc*incx] + delta*yBuffer[iLoc]; } } else { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightToDiag = Length(j+1,colShift,r); const T beta = x_STAR_MR.GetLocal(0,jLoc); const T kappa = y_MR_STAR.GetLocal(jLoc,0); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=0; iLoc<heightToDiag; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc*incx] + delta*yBuffer[iLoc]; } } //--------------------------------------------------------------------// } else { DistMatrix<T,STAR,MC> x_STAR_MC(g), y_STAR_MC(g); DistMatrix<T,STAR,MR> x_STAR_MR(g), y_STAR_MR(g); x_STAR_MC.AlignWith( A ); x_STAR_MR.AlignWith( A ); y_STAR_MC.AlignWith( A ); y_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MR = x; x_STAR_MC = x_STAR_MR; y_STAR_MR = y; y_STAR_MC = y_STAR_MR; const T* xBuffer = x_STAR_MC.LockedBuffer(); const T* yBuffer = y_STAR_MC.LockedBuffer(); const Int incx = x_STAR_MC.LDim(); const Int incy = y_STAR_MC.LDim(); if( uplo == LOWER ) { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightAboveDiag = Length(j,colShift,r); const T beta = y_STAR_MR.GetLocal(0,jLoc); const T kappa = x_STAR_MR.GetLocal(0,jLoc); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=heightAboveDiag; iLoc<localHeight; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc*incx] + delta*yBuffer[iLoc*incy]; } } else { for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = rowShift + jLoc*c; const Int heightToDiag = Length(j+1,colShift,r); const T beta = y_STAR_MR.GetLocal(0,jLoc); const T kappa = x_STAR_MR.GetLocal(0,jLoc); const T gamma = ( conjugate ? alpha*Conj(beta) : alpha*beta ); const T delta = ( conjugate ? alpha*Conj(kappa) : alpha*kappa ); T* ACol = A.Buffer(0,jLoc); for( Int iLoc=0; iLoc<heightToDiag; ++iLoc ) ACol[iLoc] += gamma*xBuffer[iLoc*incx] + delta*yBuffer[iLoc*incy]; } } //--------------------------------------------------------------------// } }
inline void Geru ( T alpha, const DistMatrix<T>& x, const DistMatrix<T>& y, DistMatrix<T>& A ) { #ifndef RELEASE PushCallStack("Geru"); if( A.Grid() != x.Grid() || x.Grid() != y.Grid() ) throw std::logic_error("{A,x,y} must be distributed over the same grid"); if( ( x.Width() != 1 && x.Height() != 1 ) || ( y.Width() != 1 && y.Height() != 1 ) ) throw std::logic_error("x and y are assumed to be vectors"); const int xLength = ( x.Width()==1 ? x.Height() : x.Width() ); const int yLength = ( y.Width()==1 ? y.Height() : y.Width() ); if( A.Height() != xLength || A.Width() != yLength ) { std::ostringstream msg; msg << "Nonconformal Geru: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " x ~ " << x.Height() << " x " << x.Width() << "\n" << " y ~ " << y.Height() << " x " << y.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); if( x.Width() == 1 && y.Width() == 1 ) { // Temporary distributions DistMatrix<T,MC,STAR> x_MC_STAR(g); DistMatrix<T,MR,STAR> y_MR_STAR(g); // Begin the algoritm x_MC_STAR.AlignWith( A ); y_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; y_MR_STAR = y; Geru ( alpha, x_MC_STAR.LockedMatrix(), y_MR_STAR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// x_MC_STAR.FreeAlignments(); y_MR_STAR.FreeAlignments(); } else if( x.Width() == 1 ) { // Temporary distributions DistMatrix<T,MC, STAR> x_MC_STAR(g); DistMatrix<T,STAR,MR > y_STAR_MR(g); // Begin the algorithm x_MC_STAR.AlignWith( A ); y_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_MC_STAR = x; y_STAR_MR = y; Geru ( alpha, x_MC_STAR.LockedMatrix(), y_STAR_MR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// x_MC_STAR.FreeAlignments(); y_STAR_MR.FreeAlignments(); } else if( y.Width() == 1 ) { // Temporary distributions DistMatrix<T,STAR,MC > x_STAR_MC(g); DistMatrix<T,MR, STAR> y_MR_STAR(g); // Begin the algorithm x_STAR_MC.AlignWith( A ); y_MR_STAR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MC = x; y_MR_STAR = y; Geru ( alpha, x_STAR_MC.LockedMatrix(), y_MR_STAR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// x_STAR_MC.FreeAlignments(); y_MR_STAR.FreeAlignments(); } else { // Temporary distributions DistMatrix<T,STAR,MC> x_STAR_MC(g); DistMatrix<T,STAR,MR> y_STAR_MR(g); // Begin the algorithm x_STAR_MC.AlignWith( A ); y_STAR_MR.AlignWith( A ); //--------------------------------------------------------------------// x_STAR_MC = x; y_STAR_MR = y; Geru ( alpha, x_STAR_MC.LockedMatrix(), y_STAR_MR.LockedMatrix(), A.Matrix() ); //--------------------------------------------------------------------// x_STAR_MC.FreeAlignments(); y_STAR_MR.FreeAlignments(); } #ifndef RELEASE PopCallStack(); #endif }