double Bs2JpsiPhi_SignalAlt_MO_v4::diffXsecNorm1( ) const { //preCalculateTimeIntegrals() ; Replaced by new Caching mechanism , but this cant be used when event resolution is selected if( useEventResolution() ) preCalculateTimeIntegrals() ; double norm = A0()*A0() * timeFactorA0A0Int( ) * angAccI1 + AP()*AP() * timeFactorAPAPInt( ) * angAccI2 + AT()*AT() * timeFactorATATInt( ) * angAccI3 + AP()*AT() * timeFactorImAPATInt( ) * angAccI4 + A0()*AP() * timeFactorReA0APInt( ) * angAccI5 + A0()*AT() * timeFactorImA0ATInt( ) * angAccI6 + AS()*AS() * timeFactorASASInt( ) * angAccI7 + AS()*AP() * timeFactorReASAPInt( ) * angAccI8 + AS()*AT() * timeFactorImASATInt( ) * angAccI9 + AS()*A0() * timeFactorReASA0Int( ) * angAccI10 ; if( DEBUGFLAG && (norm < 0) ) this->DebugPrintNorm( " Bs2JpsiPhi_SignalAlt_MO_v4_v1::diffXsecNorm1( ) : return value < 0 = ", norm ) ; return norm ; }
static PyObject* geRLS(PyObject *self,PyObject *args) { PyObject *A,*b; float lam,delta; if (!PyArg_ParseTuple(args,"OOff",&A,&b,&lam,&delta)) return NULL; //std::cout<<PyObject_Size(A)<<std::endl; //std::cout<<PyObject_Size(PyList_GetItem(A,1))<<std::endl; int m = PyObject_Size(A); int n = PyObject_Size(PyList_GetItem(A,0)); //std::cout<<PyFloat_AsDouble(PyList_GetItem(PyList_GetItem(A,1),1))<<std::endl; Matrix A0(m,n); for(int i = 0;i < m;i++) for(int j = 0;j < n;j++) A0(i,j) = PyFloat_AsDouble(PyList_GetItem(PyList_GetItem(A,i),j)); //std::cout<<A0<<std::endl; Vector b0(m); for(int i = 0;i < m;i++) b0[i] = PyFloat_AsDouble(PyList_GetItem(b,i)); Vector x(n); COPT::RLS_Method(A0,b0,x,lam,delta); //std::cout<<x<<std::endl; PyObject *list; list = PyList_New(n); for(int j = 0;j < n;j++) PyList_SetItem(list,j,Py_BuildValue("f",x[j])); //std::cout<<list<<std::endl; return Py_BuildValue("O",list); //return Py_BuildValue("s","successful extension!"); }
double Bs2JpsiPhi_SignalAlt_MO_v4::diffXsecTimeOnly( ) const { preCalculateTimeFactors() ; double xsec = A0()*A0() * timeFactorA0A0( ) * angAccI1 + AP()*AP() * timeFactorAPAP( ) * angAccI2 + AT()*AT() * timeFactorATAT( ) * angAccI3 + AP()*AT() * timeFactorImAPAT( ) * angAccI4 + A0()*AP() * timeFactorReA0AP( ) * angAccI5 + A0()*AT() * timeFactorImA0AT( ) * angAccI6 + AS()*AS() * timeFactorASAS( ) * angAccI7 + AS()*AP() * timeFactorReASAP( ) * angAccI8 + AS()*AT() * timeFactorImASAT( ) * angAccI9 + AS()*A0() * timeFactorReASA0( ) * angAccI10 ; if( useTimeAcceptance() ) xsec = xsec * timeAcc->getValue(t); if( DEBUGFLAG && (xsec < 0) ) this->DebugPrintXsec( " Bs2JpsiPhi_SignalAlt_MO_v4_v1::diffXsecTimeOnly( ) : return value < 0 = ", xsec ) ; return xsec ; }
// Least mean square method // args: // A: the input matrix // b: the right hand vector // mu: the scaling factor // return value: the obtained coefficient static PyObject* pyLeastMeanSquare(PyObject *self,PyObject *args) { PyObject *A,*b; float mu; if (!PyArg_ParseTuple(args,"OOf",&A,&b,&mu)) return NULL; int m = PyObject_Size(A); int n = PyObject_Size(PyList_GetItem(A,0)); Matrix A0(m,n); for(int i = 0;i < m;i++) for(int j = 0;j < n;j++) A0(i,j) = PyFloat_AsDouble(PyList_GetItem(PyList_GetItem(A,i),j)); Vector b0(m); for(int i = 0;i < m;i++) b0[i] = PyFloat_AsDouble(PyList_GetItem(b,i)); Vector x(n); COPT::LeastMeanSquareMethod(A0,b0,mu,x); PyObject *list; list = PyList_New(n); for(int j = 0;j < n;j++) PyList_SetItem(list,j,Py_BuildValue("f",x[j])); return Py_BuildValue("O",list); }
double Bs2JpsiPhi_mistagObservable_alt::diffXsecNorm2( ) const { double norm = 0.5 * A0()*A0() * timeFactorA0A0( ) + // Angle factors normalised to 1 0.5 * AP()*AP() * timeFactorAPAP( ) + 0.5 * AT()*AT() * timeFactorATAT( ) ; return norm ; };
double Bs2JpsiPhi_mistagObservable_alt::diffXsec( ) const { double xsec = 0.5 * A0()*A0() * timeFactorA0A0( ) * angleFactorA0A0( ) + 0.5 * AP()*AP() * timeFactorAPAP( ) * angleFactorAPAP( ) + 0.5 * AT()*AT() * timeFactorATAT( ) * angleFactorATAT( ) + 0.5 * A0()*AP() * timeFactorReA0AP( ) * angleFactorReA0AP( ) + 0.5 * AP()*AT() * timeFactorImAPAT( ) * angleFactorImAPAT( ) + 0.5 * A0()*AT() * timeFactorImA0AT( ) * angleFactorImA0AT( ) ; return xsec ; };
double Bs2JpsiPhi_mistagObservable_alt::diffXsecNorm1( ) const { double reference = 32.0*TMath::Pi()/9.0 ; double norm = 0.5 * A0()*A0() * timeFactorA0A0Int( ) * angAccI1 + 0.5 * AP()*AP() * timeFactorAPAPInt( ) * angAccI2 + 0.5 * AT()*AT() * timeFactorATATInt( ) * angAccI3 + 0.5 * A0()*AP() * timeFactorReA0APInt( ) * angAccI5 + 0.5 * AP()*AT() * timeFactorImAPATInt( ) * angAccI4 + 0.5 * A0()*AT() * timeFactorImA0ATInt( ) * angAccI6 ; // In the canonical PDF, the ApAt term is number 4! return norm ; };
double SBVAR_symmetric_linear_normalized::LogPrior(void) { for (int i=0; i < n_vars; i++) if (A0(i,i) < 0.0) return -1.0E300; return SBVAR_symmetric_linear::LogPrior() + 0.693147180559945*n_vars; // 0.693147180559945 = log(2) }
//....................................................... // New speed up method to Cache time integrals void Bs2JpsiPhi_SignalAlt_MO_v4::CacheAmplitudesAndAngles() { CachedA1 = A0()*A0() * angleFactorA0A0( ) ; CachedA2 = AP()*AP() * angleFactorAPAP( ) ; CachedA3 = AT()*AT() * angleFactorATAT( ) ; CachedA4 = AP()*AT() * angleFactorImAPAT( ) ; CachedA5 = A0()*AP() * angleFactorReA0AP( ) ; CachedA6 = A0()*AT() * angleFactorImA0AT( ) ; CachedA7 = AS()*AS() * angleFactorASAS( ) ; CachedA8 = AS()*AP() * angleFactorReASAP( ) ; CachedA9 = AS()*AT() * angleFactorImASAT( ) ; CachedA10= AS()*A0() * angleFactorReASA0( ) ; }
static void NonBlockHessenberg( MatrixView<T> A, VectorView<T> Ubeta) { #ifdef XDEBUG cout<<"Start NonBlock Hessenberg Reduction: A = "<<A<<endl; Matrix<T> A0(A); #endif // Decompose A into U H Ut // H is a Hessenberg Matrix // U is a Unitary Matrix // On output, H is stored in the upper-Hessenberg part of A // U is stored in compact form in the rest of A along with // the vector Ubeta. const ptrdiff_t N = A.rowsize(); TMVAssert(A.colsize() == A.rowsize()); TMVAssert(N > 0); TMVAssert(Ubeta.size() == N-1); TMVAssert(A.iscm() || A.isrm()); TMVAssert(!Ubeta.isconj()); TMVAssert(Ubeta.step()==1); // We use Householder reflections to reduce A to the Hessenberg form: T* Uj = Ubeta.ptr(); T det = 0; // Ignore Householder det calculations for(ptrdiff_t j=0;j<N-1;++j,++Uj) { #ifdef TMVFLDEBUG TMVAssert(Uj >= Ubeta._first); TMVAssert(Uj < Ubeta._last); #endif *Uj = Householder_Reflect(A.subMatrix(j+1,N,j,N),det); if (*Uj != T(0)) Householder_LMult(A.col(j+2,N),*Uj,A.subMatrix(0,N,j+1,N).adjoint()); } #ifdef XDEBUG Matrix<T> U(N,N,T(0)); U.subMatrix(1,N,1,N) = A.subMatrix(1,N,0,N-1); U.upperTri().setZero(); Vector<T> Ubeta2(N); Ubeta2.subVector(1,N) = Ubeta; Ubeta2(0) = T(0); GetQFromQR(U.view(),Ubeta2); Matrix<T> H = A; if (N>2) LowerTriMatrixViewOf(H).offDiag(2).setZero(); Matrix<T> AA = U*H*U.adjoint(); if (Norm(A0-AA) > 0.001*Norm(A0)) { cerr<<"NonBlock Hessenberg: A = "<<Type(A)<<" "<<A0<<endl; cerr<<"A = "<<A<<endl; cerr<<"Ubeta = "<<Ubeta<<endl; cerr<<"U = "<<U<<endl; cerr<<"H = "<<H<<endl; cerr<<"UHUt = "<<AA<<endl; abort(); } #endif }
double Bs2JpsiPhi_mistagObservable_alt::Normalisation(DataPoint * measurement, PhaseSpaceBoundary * boundary) { // Get observables into member variables t = measurement->GetObservable( timeName )->GetValue() - timeOffset; ctheta_tr = measurement->GetObservable( cosThetaName )->GetValue(); phi_tr = measurement->GetObservable( phiName )->GetValue(); ctheta_1 = measurement->GetObservable( cosPsiName )->GetValue(); tagFraction = measurement->GetObservable( mistagName )->GetValue(); //tagFraction= 0.5; //PELC // Get time boundaries into member variables IConstraint * timeBound = boundary->GetConstraint("time"); if ( timeBound->GetUnit() == "NameNotFoundError" ) { cerr << "Bound on time not provided" << endl; return 0; } else { tlo = timeBound->GetMinimum(); thi = timeBound->GetMaximum(); } // Recalculate cached values if Physics parameters have changed // Must do this for each of the two resolutions. //PELC // I dont think you can cache any more as normalisation depends upon the mistag which now changes per event. if( true /*! normalisationCacheValid*/ ) { for( tag = -1; tag <= 1; tag ++ ) { resolution = resolution1 ; normalisationCacheValueRes1[tag+1] = this->diffXsecNorm1( ); resolution = resolution2 ; normalisationCacheValueRes2[tag+1] = this->diffXsecNorm1( ); } normalisationCacheValid = true ; } // Return normalisation value according to tag tag = (int)measurement->GetObservable( tagName )->GetValue(); double returnValue = resolution1Fraction*normalisationCacheValueRes1[tag+1] + (1. - resolution1Fraction)*normalisationCacheValueRes2[tag+1] ; if( (returnValue <= 0.) || isnan(returnValue) ) { cout << " Bs2JpsiPhi_mistagObservable_alt::Normalisation() returns <=0 or nan " << endl ; cout << " gamma " << gamma() ; cout << " gl " << gamma_l() ; cout << " gh " << gamma_h() ; cout << " AT " << AT() ; cout << " AP " << AP() ; cout << " A0 " << A0() ; exit(1) ; } return returnValue ; }
inline void SyrkLN ( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SyrkLN"); if( A.Grid() != C.Grid() ) throw std::logic_error ("A and C must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() ) { std::ostringstream msg; msg << "Nonconformal SyrkLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate ); LocalTrrk( LOWER, alpha, A1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
void test_zerosized() { // default constructors: Eigen::MatrixXd A; Eigen::VectorXd v; // explicit zero-sized: Eigen::ArrayXXd A0(0,0); Eigen::ArrayXd v0(0); // assigning empty objects to each other: A=A0; v=v0; }
int hpx_main(boost::program_options::variables_map& vm) { { orthotope<double> A0({3, 3}) , A1({3, 3}) , A2({3, 3}) , A3({3, 3}) , A4({3, 3}) , A5({4, 4}) ; // QR {{1, 3, 6}, {3, 5, 7}, {6, 7, 4}} A0.row(0, 1, 3, 6 ); A0.row(1, 3, 5, 7 ); A0.row(2, 6, 7, 4 ); // QR {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}} A1.row(0, 12, -51, 4 ); A1.row(1, 6, 167, -68 ); A1.row(2, -4, 24, -41 ); // QR {{2, -2, 18}, {2, 1, 0}, {1, 2, 0}} A2.row(0, 2, -2, 18 ); A2.row(1, 2, 1, 0 ); A2.row(2, 1, 2, 0 ); // QR {{0, 1, 1}, {1, 1, 2}, {0, 0, 3}} A3.row(0, 0, 1, 1 ); A3.row(1, 1, 1, 2 ); A3.row(2, 0, 0, 3 ); // QR {{1, 1, -1}, {1, 2, 1}, {1, 2, -1}} A4.row(0, 1, 1, -1 ); A4.row(1, 1, 2, 1 ); A4.row(2, 1, 2, -1 ); // QR {{4, -2, 2, 8}, {-2, 6, 2, 4}, {2, 2, 10, -6}, {8, 4, -6, 12}} A5.row(0, 4, -2, 2, 8 ); A5.row(1, -2, 6, 2, 4 ); A5.row(2, 2, 2, 10, -6 ); A5.row(3, 8, 4, -6, 12 ); householders(A0); householders(A1); householders(A2); householders(A3); householders(A4); householders(A5); } return hpx::finalize(); }
TYPED_TEST(Intesection_TEST, intersection_ray_triangle) { using Scalar = typename cgogn::geometry::vector_traits<TypeParam>::Scalar; TypeParam p0(Scalar(1), Scalar(1), Scalar(96.1)); TypeParam p1(Scalar(5), Scalar(1), Scalar(92.3)); TypeParam p2(Scalar(3), Scalar(5), Scalar(94.2)); TypeParam A0(Scalar(3), Scalar(3), Scalar(0)); TypeParam D0(Scalar(0.001), Scalar(0.001), Scalar(1.0)); TypeParam A1(Scalar(3), Scalar(1), Scalar(0)); TypeParam D1(Scalar(0), Scalar(0), Scalar(1.0)); TypeParam A2(Scalar(5), Scalar(1), Scalar(0)); TypeParam A3(Scalar(9), Scalar(5), Scalar(0)); EXPECT_TRUE(cgogn::geometry::intersection_ray_triangle(A0,D0,p0,p1,p2)); EXPECT_TRUE(cgogn::geometry::intersection_ray_triangle(A1,D1,p0,p1,p2)); EXPECT_TRUE(cgogn::geometry::intersection_ray_triangle(A2,D1,p0,p1,p2)); EXPECT_FALSE(cgogn::geometry::intersection_ray_triangle(A3,D0,p0,p1,p2)); }
void collect_dist_matrix(boost::mpi::communicator& comm, bool here, const elem::DistMatrix<T, elem::STAR, ColDist> &DA, elem::Matrix<T> &A) { if (ColDist == elem::VR || ColDist == elem::VC) { // TODO this is probably the most laziest way to do it. // Must be possible to do it much better (less communication). try { elem::Matrix<T> A0(DA.Height(), DA.Width(), DA.Height()); const elem::Matrix<T> &A_local = DA.LockedMatrix(); elem::Zero(A0); for(int j = 0; j < A_local.Width(); j++) for(int i = 0; i < A_local.Height(); i++) A0.Set(i, DA.RowShift() + j * DA.RowStride(), A_local.Get(i, j)); boost::mpi::reduce (comm, A0.LockedBuffer(), A0.MemorySize(), A.Buffer(), std::plus<T>(), 0); } catch (std::logic_error e) { SKYLARK_THROW_EXCEPTION (base::elemental_exception() << base::error_msg(e.what()) ); } catch(boost::mpi::exception e) { SKYLARK_THROW_EXCEPTION (base::mpi_exception() << base::error_msg(e.what()) ); } } else { SKYLARK_THROW_EXCEPTION ( base::unsupported_matrix_distribution() ); } }
inline void GemmNNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNDot: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } B1_VC_STAR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } A1_STAR_VR.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::ApplyPackedReflectorsLLVF ( Conjugation conjugation, int offset, const DistMatrix<Complex<R>,MC,MR >& H, const DistMatrix<Complex<R>,MD,STAR>& t, DistMatrix<Complex<R>,MC,MR >& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLLVF"); if( H.Grid() != t.Grid() || t.Grid() != A.Grid() ) throw std::logic_error ("{H,t,A} must be distributed over the same grid"); if( offset > 0 ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < -H.Height() ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Height() != A.Height() ) throw std::logic_error ("Height of transforms must equal height of target matrix"); if( t.Height() != H.DiagonalLength( offset ) ) throw std::logic_error("t must be the same length as H's offset diag."); if( !t.AlignedWithDiagonal( H, offset ) ) throw std::logic_error("t must be aligned with H's 'offset' diagonal"); #endif typedef Complex<R> C; const Grid& g = H.Grid(); // Matrix views DistMatrix<C,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<C,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<C,MD,STAR> tT(g), t0(g), tB(g), t1(g), t2(g); DistMatrix<C,VC, STAR> HPan_VC_STAR(g); DistMatrix<C,MC, STAR> HPan_MC_STAR(g); DistMatrix<C,STAR,STAR> t1_STAR_STAR(g); DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<C,STAR,MR > Z_STAR_MR(g); DistMatrix<C,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); LockedPartitionDown ( t, tT, tB, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); int HPanHeight = H11.Height() + H21.Height(); int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); LockedRepartitionDown ( tT, t0, /**/ /**/ t1, tB, t2, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); HPan_MC_STAR.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); Z_STAR_MR.ResizeTo( HPan.Width(), AB.Width() ); SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Herk ( UPPER, ADJOINT, (C)1, HPan_VC_STAR.LockedLocalMatrix(), (C)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); t1_STAR_STAR = t1; FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( ADJOINT, NORMAL, (C)1, HPan_MC_STAR, AB, (C)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, ADJOINT, NON_UNIT, (C)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (C)-1, HPan_MC_STAR, Z_STAR_MR, (C)1, AB ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlideLockedPartitionDown ( tT, t0, t1, /**/ /**/ tB, t2 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::ApplyPackedReflectorsLLVF ( int offset, const DistMatrix<R,MC,MR>& H, DistMatrix<R,MC,MR>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLLVF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset > 0 ) throw std::logic_error("Transforms cannot extend above matrix"); if( offset < -H.Height() ) throw std::logic_error("Transforms cannot extend below matrix"); if( H.Height() != A.Height() ) throw std::logic_error ("Height of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); // Matrix views DistMatrix<R,MC,MR> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MC, STAR> HPan_MC_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); int HPanHeight = H11.Height() + H21.Height(); int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) ); HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); HPan_MC_STAR.AlignWith( AB ); Z_STAR_MR.AlignWith( AB ); Z_STAR_VR.AlignWith( AB ); Z_STAR_MR.ResizeTo( HPanWidth, AB.Width() ); SInv_STAR_STAR.ResizeTo( HPanWidth, HPanWidth ); Zero( SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy ); SetDiagonalToOne( LEFT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Syrk ( UPPER, TRANSPOSE, (R)1, HPan_VC_STAR.LockedLocalMatrix(), (R)0, SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; internal::LocalGemm ( TRANSPOSE, NORMAL, (R)1, HPan_MC_STAR, AB, (R)0, Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); internal::LocalTrsm ( LEFT, UPPER, TRANSPOSE, NON_UNIT, (R)1, SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; internal::LocalGemm ( NORMAL, NORMAL, (R)-1, HPan_MC_STAR, Z_STAR_MR, (R)1, AB ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Her2kLN ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::Her2kLN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Her2kLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Adj_STAR_MR(g); DistMatrix<T,STAR,MR > B1Adj_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Adj_STAR_MR.AlignWith( C ); B1Adj_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Adj_STAR_MR.AdjointFrom( A1_VR_STAR ); B1_VR_STAR = B1_MC_STAR = B1; B1Adj_STAR_MR.AdjointFrom( B1_VR_STAR ); LocalTrr2k ( LOWER, alpha, A1_MC_STAR, B1Adj_STAR_MR, B1_MC_STAR, A1Adj_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Syr2kUT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( UPPER, orientation, TRANSPOSE, orientation, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
void Trr2kNTTN ( UpperOrLower uplo, Orientation orientationOfB, Orientation orientationOfC, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE CallStackEntry entry("internal::Trr2kNTTN"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Width() != E.Width() || A.Width() != B.Width() || C.Height() != D.Height() ) LogicError("Nonconformal Trr2kNTTN"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DT(g), D0(g), DB(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1Trans_MR_STAR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionDown ( D, DT, DB, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionDown ( DT, D0, /**/ /**/ D1, DB, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); D1Trans_MR_STAR.TransposeFrom( D1 ); LocalTrr2k ( uplo, orientationOfC, TRANSPOSE, alpha, A1_MC_STAR, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1Trans_MR_STAR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( DT, D0, D1, /**/ /**/ DB, D2 ); } }
inline void ApplyPackedReflectorsLUVF ( int offset, const DistMatrix<R>& H, DistMatrix<R>& A ) { #ifndef RELEASE PushCallStack("internal::ApplyPackedReflectorsLUVF"); if( H.Grid() != A.Grid() ) throw std::logic_error("{H,A} must be distributed over the same grid"); if( offset < 0 || offset > H.Height() ) throw std::logic_error("Transforms out of bounds"); if( H.Width() != A.Height() ) throw std::logic_error ("Width of transforms must equal height of target matrix"); #endif const Grid& g = H.Grid(); DistMatrix<R> HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HBL(g), HBR(g), H10(g), H11(g), H12(g), H20(g), H21(g), H22(g); DistMatrix<R> AT(g), A0(g), ATop(g), AB(g), A1(g), A2(g); DistMatrix<R> HPanCopy(g); DistMatrix<R,VC, STAR> HPan_VC_STAR(g); DistMatrix<R,MC, STAR> HPan_MC_STAR(g); DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g); DistMatrix<R,STAR,MR > Z_STAR_MR(g); DistMatrix<R,STAR,VR > Z_STAR_VR(g); LockedPartitionDownDiagonal ( H, HTL, HTR, HBL, HBR, 0 ); PartitionDown ( A, AT, AB, 0 ); while( HTL.Height() < H.Height() && HTL.Width() < H.Width() ) { LockedRepartitionDownDiagonal ( HTL, /**/ HTR, H00, /**/ H01, H02, /*************/ /******************/ /**/ H10, /**/ H11, H12, HBL, /**/ HBR, H20, /**/ H21, H22 ); const int HPanHeight = H01.Height() + H11.Height(); const int HPanOffset = std::min( H11.Width(), std::max(offset-H00.Width(),0) ); const int HPanWidth = H11.Width()-HPanOffset; HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth ); RepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); ATop.View2x1( A0, A1 ); HPan_MC_STAR.AlignWith( ATop ); Z_STAR_MR.AlignWith( ATop ); Z_STAR_VR.AlignWith( ATop ); Zeros( HPan.Width(), ATop.Width(), Z_STAR_MR ); Zeros( HPan.Width(), HPan.Width(), SInv_STAR_STAR ); //--------------------------------------------------------------------// HPanCopy = HPan; MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy ); SetDiagonalToOne( RIGHT, offset, HPanCopy ); HPan_VC_STAR = HPanCopy; Syrk ( LOWER, TRANSPOSE, R(1), HPan_VC_STAR.LockedLocalMatrix(), R(0), SInv_STAR_STAR.LocalMatrix() ); SInv_STAR_STAR.SumOverGrid(); HalveMainDiagonal( SInv_STAR_STAR ); HPan_MC_STAR = HPanCopy; LocalGemm ( TRANSPOSE, NORMAL, R(1), HPan_MC_STAR, ATop, R(0), Z_STAR_MR ); Z_STAR_VR.SumScatterFrom( Z_STAR_MR ); LocalTrsm ( LEFT, LOWER, NORMAL, NON_UNIT, R(1), SInv_STAR_STAR, Z_STAR_VR ); Z_STAR_MR = Z_STAR_VR; LocalGemm( NORMAL, NORMAL, R(-1), HPan_MC_STAR, Z_STAR_MR, R(1), ATop ); //--------------------------------------------------------------------// HPan_MC_STAR.FreeAlignments(); Z_STAR_MR.FreeAlignments(); Z_STAR_VR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( HTL, /**/ HTR, H00, H01, /**/ H02, /**/ H10, H11, /**/ H12, /*************/ /******************/ HBL, /**/ HBR, H20, H21, /**/ H22 ); SlidePartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
double LoopToolsWrapper::PV_A0(const double mu2, const double m2) const { setmudim(mu2); std::complex<double> A0val = A0(m2); return ( A0val.real() ); }
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmTTC ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTC expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( C ); B1_VR_STAR.AlignWith( C ); B1AdjOrTrans_STAR_MR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); // C[MC,MR] += alpha (A1[*,MC])^[T/H] (B1[MR,*])^[T/H] // = alpha (A1^[T/H])[MC,*] (B1^[T/H])[*,MR] LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmTTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTB expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_STAR_MC ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; B1Trans_MR_STAR.TransposeFrom( B1 ); // C[MC,MR] += alpha A1[MC,*] (B1^T[MR,*])^T // = alpha A1[MC,*] B1[*,MR] LocalGemm ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionDown( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
static void BlockHessenberg( MatrixView<T> A, VectorView<T> Ubeta) { // Much like the block version of Bidiagonalize, we try to maintain // the operation of several successive Householder matrices in // a block form, where the net Block Householder is I - YZYt. // // But as with the bidiagonlization algorithm (and unlike a simple // block QR decomposition), we update the matrix from both the left // and the right, so we also need to keep track of the product // ZYtm in addition. // // The block update at the end of the block loop is // m' = (I-YZYt) m (I-YZtYt) // // The Y matrix is stored in the first K columns of m, // and the Hessenberg portion of these columns is updated as we go. // For the right-hand-side update, m -= mYZtYt, the m on the right // needs to be the full original matrix m, including the original // versions of these K columns. Therefore, we can't wait until // the end for this calculation. // // Instead, we keep track of mYZt as we progress, so the final update // is: // // m' = (I-YZYt) (m - mYZt Y) // // We also need to do this same calculation for each column as we // progress through the block. // const ptrdiff_t N = A.rowsize(); #ifdef XDEBUG Matrix<T> A0(A); #endif TMVAssert(A.rowsize() == A.colsize()); TMVAssert(N > 0); TMVAssert(Ubeta.size() == N-1); TMVAssert(!Ubeta.isconj()); TMVAssert(Ubeta.step()==1); ptrdiff_t ncolmax = MIN(HESS_BLOCKSIZE,N-1); Matrix<T,RowMajor> mYZt_full(N,ncolmax); UpperTriMatrix<T,NonUnitDiag|ColMajor> Z_full(ncolmax); T det(0); // Ignore Householder Determinant calculations T* Uj = Ubeta.ptr(); for(ptrdiff_t j1=0;j1<N-1;) { ptrdiff_t j2 = MIN(N-1,j1+HESS_BLOCKSIZE); ptrdiff_t ncols = j2-j1; MatrixView<T> mYZt = mYZt_full.subMatrix(0,N-j1,0,ncols); UpperTriMatrixView<T> Z = Z_full.subTriMatrix(0,ncols); for(ptrdiff_t j=j1,jj=0;j<j2;++j,++jj,++Uj) { // jj = j-j1 // Update current column of A // // m' = (I - YZYt) (m - mYZt Yt) // A(0:N,j)' = A(0:N,j) - mYZt(0:N,0:j) Y(j,0:j)t A.col(j,j1+1,N) -= mYZt.Cols(0,j) * A.row(j,0,j).Conjugate(); // // A(0:N,j)'' = A(0:N,j) - Y Z Yt A(0:N,j)' // // Let Y = (L) where L is unit-diagonal, lower-triangular, // (M) and M is rectangular // LowerTriMatrixView<T> L = LowerTriMatrixViewOf(A.subMatrix(j1+1,j+1,j1,j),UnitDiag); MatrixView<T> M = A.subMatrix(j+1,N,j1,j); // Use the last column of Z as temporary storage for Yt A(0:N,j)' VectorView<T> YtAj = Z.col(jj,0,jj); YtAj = L.adjoint() * A.col(j,j1+1,j+1); YtAj += M.adjoint() * A.col(j,j+1,N); YtAj = Z.subTriMatrix(0,jj) * YtAj; A.col(j,j1+1,j+1) -= L * YtAj; A.col(j,j+1,N) -= M * YtAj; // Do the Householder reflection VectorView<T> u = A.col(j,j+1,N); T bu = Householder_Reflect(u,det); #ifdef TMVFLDEBUG TMVAssert(Uj >= Ubeta._first); TMVAssert(Uj < Ubeta._last); #endif *Uj = bu; // Save the top of the u vector, which isn't actually part of u T& Atemp = *u.cptr(); TMVAssert(IMAG(Atemp) == RealType(T)(0)); RealType(T) Aorig = REAL(Atemp); Atemp = RealType(T)(1); // Update Z VectorView<T> Zj = Z.col(jj,0,jj); Zj = -bu * M.adjoint() * u; Zj = Z * Zj; Z(jj,jj) = -bu; // Update mYtZt: // // mYZt(0:N,j) = m(0:N,0:N) Y(0:N,0:j) Zt(0:j,j) // = m(0:N,j+1:N) Y(j+1:N,j) Zt(j,j) // = bu* m(0:N,j+1:N) u // mYZt.col(jj) = CONJ(bu) * A.subMatrix(j1,N,j+1,N) * u; // Restore Aorig, which is actually part of the Hessenberg matrix. Atemp = Aorig; } // Update the rest of the matrix: // A(j2,j2-1) needs to be temporarily changed to 1 for use in Y T& Atemp = *(A.ptr() + j2*A.stepi() + (j2-1)*A.stepj()); TMVAssert(IMAG(Atemp) == RealType(T)(0)); RealType(T) Aorig = Atemp; Atemp = RealType(T)(1); // m' = (I-YZYt) (m - mYZt Y) MatrixView<T> m = A.subMatrix(j1,N,j2,N); ConstMatrixView<T> Y = A.subMatrix(j2+1,N,j1,j2); m -= mYZt * Y.adjoint(); BlockHouseholder_LMult(Y,Z,m); // Restore A(j2,j2-1) Atemp = Aorig; j1 = j2; } #ifdef XDEBUG Matrix<T> U(N,N,T(0)); U.subMatrix(1,N,1,N) = A.subMatrix(1,N,0,N-1); U.upperTri().setZero(); U(0,0) = T(1); Vector<T> Ubeta2(N); Ubeta2.subVector(1,N) = Ubeta; Ubeta2(0) = T(0); GetQFromQR(U.view(),Ubeta2); Matrix<T> H = A; if (N>2) LowerTriMatrixViewOf(H).offDiag(2).setZero(); Matrix<T> AA = U*H*U.adjoint(); if (Norm(A0-AA) > 0.001*Norm(A0)) { cerr<<"NonBlock Hessenberg: A = "<<Type(A)<<" "<<A0<<endl; cerr<<"A = "<<A<<endl; cerr<<"Ubeta = "<<Ubeta<<endl; cerr<<"U = "<<U<<endl; cerr<<"H = "<<H<<endl; cerr<<"UHUt = "<<AA<<endl; Matrix<T,ColMajor> A2 = A0; Vector<T> Ub2(Ubeta.size()); NonBlockHessenberg(A2.view(),Ub2.view()); cerr<<"cf NonBlock: A -> "<<A2<<endl; cerr<<"Ubeta = "<<Ub2<<endl; abort(); } #endif }
inline void GemmNNB ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ] LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR ); C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }