double Bs2JpsiPhi_SignalAlt_MO_v4::diffXsecNorm1(  ) const
{ 
	//preCalculateTimeIntegrals() ;  Replaced by new Caching mechanism , but this cant be used when event resolution is selected 
	if( useEventResolution() ) preCalculateTimeIntegrals() ;   
	
	double norm =
	
	A0()*A0() * timeFactorA0A0Int(  ) * angAccI1   +  
	AP()*AP() * timeFactorAPAPInt(  ) * angAccI2   +  
	AT()*AT() * timeFactorATATInt(  ) * angAccI3   +  
	
	AP()*AT() * timeFactorImAPATInt(  ) * angAccI4 +  
	A0()*AP() * timeFactorReA0APInt(  ) * angAccI5 +  
	A0()*AT() * timeFactorImA0ATInt(  ) * angAccI6 +  
	
	AS()*AS() * timeFactorASASInt(  ) * angAccI7   +  
	
	AS()*AP() * timeFactorReASAPInt(  ) * angAccI8 +  
	AS()*AT() * timeFactorImASATInt(  ) * angAccI9 +  
	AS()*A0() * timeFactorReASA0Int(  ) * angAccI10 ;  
	
	if( DEBUGFLAG && (norm < 0) ) this->DebugPrintNorm( " Bs2JpsiPhi_SignalAlt_MO_v4_v1::diffXsecNorm1( ) : return value < 0 = ", norm ) ;
	
	return norm ;
}
Example #2
0
static PyObject* geRLS(PyObject *self,PyObject *args)
{

    PyObject *A,*b;
    float lam,delta;
	if (!PyArg_ParseTuple(args,"OOff",&A,&b,&lam,&delta))
	 	return NULL;
	//std::cout<<PyObject_Size(A)<<std::endl;
    //std::cout<<PyObject_Size(PyList_GetItem(A,1))<<std::endl;
	int m = PyObject_Size(A);
	int n = PyObject_Size(PyList_GetItem(A,0));
    //std::cout<<PyFloat_AsDouble(PyList_GetItem(PyList_GetItem(A,1),1))<<std::endl;
	Matrix A0(m,n);
	for(int i = 0;i < m;i++)
		for(int j = 0;j < n;j++)
			A0(i,j) = PyFloat_AsDouble(PyList_GetItem(PyList_GetItem(A,i),j));
	//std::cout<<A0<<std::endl;
	Vector b0(m);
	for(int i = 0;i < m;i++)
		b0[i] = PyFloat_AsDouble(PyList_GetItem(b,i));
	Vector x(n);
	COPT::RLS_Method(A0,b0,x,lam,delta);
	//std::cout<<x<<std::endl;

	PyObject *list;
	list = PyList_New(n);
	for(int j = 0;j < n;j++)
		PyList_SetItem(list,j,Py_BuildValue("f",x[j]));
	//std::cout<<list<<std::endl;
	return Py_BuildValue("O",list);
	//return Py_BuildValue("s","successful extension!");
}
double Bs2JpsiPhi_SignalAlt_MO_v4::diffXsecTimeOnly(  ) const
{          
	preCalculateTimeFactors() ;
	
	double xsec = 
	
	A0()*A0() * timeFactorA0A0(  ) * angAccI1 +
	AP()*AP() * timeFactorAPAP(  ) * angAccI2 +
	AT()*AT() * timeFactorATAT(  ) * angAccI3 +
	
	AP()*AT() * timeFactorImAPAT(  ) * angAccI4 +
	A0()*AP() * timeFactorReA0AP(  ) * angAccI5 +
	A0()*AT() * timeFactorImA0AT(  ) * angAccI6 +
	
	AS()*AS() * timeFactorASAS(  ) * angAccI7 +
	
	AS()*AP() * timeFactorReASAP(  ) * angAccI8 +
	AS()*AT() * timeFactorImASAT(  ) * angAccI9 +
	AS()*A0() * timeFactorReASA0(  ) * angAccI10 ;
	
	if( useTimeAcceptance() ) xsec = xsec * timeAcc->getValue(t);
	
	if( DEBUGFLAG && (xsec < 0) ) this->DebugPrintXsec( " Bs2JpsiPhi_SignalAlt_MO_v4_v1::diffXsecTimeOnly( ) : return value < 0 = ", xsec ) ;
	
	return xsec ;
}
Example #4
0
//		Least mean square method
//		args:
//		A:			the input matrix
//		b:			the right hand vector
//		mu:			the scaling factor
//		return value: the obtained coefficient
static PyObject* pyLeastMeanSquare(PyObject *self,PyObject *args)
{

    PyObject *A,*b;
    float mu;
	if (!PyArg_ParseTuple(args,"OOf",&A,&b,&mu))
	 	return NULL;
	int m = PyObject_Size(A);
	int n = PyObject_Size(PyList_GetItem(A,0));
	Matrix A0(m,n);
	for(int i = 0;i < m;i++)
		for(int j = 0;j < n;j++)
			A0(i,j) = PyFloat_AsDouble(PyList_GetItem(PyList_GetItem(A,i),j));
	Vector b0(m);
	for(int i = 0;i < m;i++)
		b0[i] = PyFloat_AsDouble(PyList_GetItem(b,i));
	Vector x(n);
	COPT::LeastMeanSquareMethod(A0,b0,mu,x);

	PyObject *list;
	list = PyList_New(n);
	for(int j = 0;j < n;j++)
		PyList_SetItem(list,j,Py_BuildValue("f",x[j]));
	return Py_BuildValue("O",list);
}
double Bs2JpsiPhi_mistagObservable_alt::diffXsecNorm2(  ) const
{          
	double norm = 
	0.5 * A0()*A0() * timeFactorA0A0(  ) +    // Angle factors normalised to 1
	0.5 * AP()*AP() * timeFactorAPAP(  ) +
	0.5 * AT()*AT() * timeFactorATAT(  ) ;
	
	return norm ;
};
double Bs2JpsiPhi_mistagObservable_alt::diffXsec(  )  const
{   
	double xsec = 
	0.5 * A0()*A0() * timeFactorA0A0(  ) * angleFactorA0A0( ) +
	0.5 * AP()*AP() * timeFactorAPAP(  ) * angleFactorAPAP( ) +
	0.5 * AT()*AT() * timeFactorATAT(  ) * angleFactorATAT( ) +
	0.5 * A0()*AP() * timeFactorReA0AP(  ) * angleFactorReA0AP( ) +
	0.5 * AP()*AT() * timeFactorImAPAT(  ) * angleFactorImAPAT( ) +
	0.5 * A0()*AT() * timeFactorImA0AT(  ) * angleFactorImA0AT( ) ;
	
	return xsec ;
};
double Bs2JpsiPhi_mistagObservable_alt::diffXsecNorm1(  ) const
{      
	double reference =  32.0*TMath::Pi()/9.0 ;
	
	double norm = 
	0.5 * A0()*A0() * timeFactorA0A0Int(  ) * angAccI1   +  
	0.5 * AP()*AP() * timeFactorAPAPInt(  ) * angAccI2   +  
	0.5 * AT()*AT() * timeFactorATATInt(  ) * angAccI3   +  

	0.5 * A0()*AP() * timeFactorReA0APInt(  ) * angAccI5 +  
	0.5 * AP()*AT() * timeFactorImAPATInt(  ) * angAccI4 +  
	0.5 * A0()*AT() * timeFactorImA0ATInt(  ) * angAccI6 ;  	
	// In the canonical PDF, the ApAt term is number 4!
	return norm ;
};
Example #8
0
double SBVAR_symmetric_linear_normalized::LogPrior(void)
{
  for (int i=0; i < n_vars; i++)
    if (A0(i,i) < 0.0)
      return -1.0E300;
  return SBVAR_symmetric_linear::LogPrior() + 0.693147180559945*n_vars;    // 0.693147180559945 = log(2)
}
//.......................................................
// New speed up method to Cache time integrals
void Bs2JpsiPhi_SignalAlt_MO_v4::CacheAmplitudesAndAngles() {
	
	CachedA1 = A0()*A0() * angleFactorA0A0( ) ;
	CachedA2 = AP()*AP() * angleFactorAPAP( ) ;
	CachedA3 = AT()*AT() * angleFactorATAT( ) ;
	
	CachedA4 = AP()*AT() * angleFactorImAPAT( ) ;
	CachedA5 = A0()*AP() * angleFactorReA0AP( ) ;
	CachedA6 = A0()*AT() * angleFactorImA0AT( ) ;
	
	CachedA7 = AS()*AS() * angleFactorASAS( ) ;
	
	CachedA8 = AS()*AP() * angleFactorReASAP( ) ;
	CachedA9 = AS()*AT() * angleFactorImASAT( ) ;
	CachedA10= AS()*A0() * angleFactorReASA0( ) ;	
	
}
Example #10
0
    static void NonBlockHessenberg(
        MatrixView<T> A, VectorView<T> Ubeta)
    {
#ifdef XDEBUG
        cout<<"Start NonBlock Hessenberg Reduction: A = "<<A<<endl;
        Matrix<T> A0(A);
#endif
        // Decompose A into U H Ut
        // H is a Hessenberg Matrix
        // U is a Unitary Matrix
        // On output, H is stored in the upper-Hessenberg part of A
        // U is stored in compact form in the rest of A along with 
        // the vector Ubeta.
        const ptrdiff_t N = A.rowsize();

        TMVAssert(A.colsize() == A.rowsize());
        TMVAssert(N > 0);
        TMVAssert(Ubeta.size() == N-1);
        TMVAssert(A.iscm() || A.isrm());
        TMVAssert(!Ubeta.isconj());
        TMVAssert(Ubeta.step()==1);

        // We use Householder reflections to reduce A to the Hessenberg form:
        T* Uj = Ubeta.ptr();
        T det = 0; // Ignore Householder det calculations
        for(ptrdiff_t j=0;j<N-1;++j,++Uj) {
#ifdef TMVFLDEBUG
            TMVAssert(Uj >= Ubeta._first);
            TMVAssert(Uj < Ubeta._last);
#endif
            *Uj = Householder_Reflect(A.subMatrix(j+1,N,j,N),det);
            if (*Uj != T(0))
                Householder_LMult(A.col(j+2,N),*Uj,A.subMatrix(0,N,j+1,N).adjoint());
        }

#ifdef XDEBUG
        Matrix<T> U(N,N,T(0));
        U.subMatrix(1,N,1,N) = A.subMatrix(1,N,0,N-1);
        U.upperTri().setZero();
        Vector<T> Ubeta2(N);
        Ubeta2.subVector(1,N) = Ubeta;
        Ubeta2(0) = T(0);
        GetQFromQR(U.view(),Ubeta2);
        Matrix<T> H = A;
        if (N>2) LowerTriMatrixViewOf(H).offDiag(2).setZero();
        Matrix<T> AA = U*H*U.adjoint();
        if (Norm(A0-AA) > 0.001*Norm(A0)) {
            cerr<<"NonBlock Hessenberg: A = "<<Type(A)<<"  "<<A0<<endl;
            cerr<<"A = "<<A<<endl;
            cerr<<"Ubeta = "<<Ubeta<<endl;
            cerr<<"U = "<<U<<endl;
            cerr<<"H = "<<H<<endl;
            cerr<<"UHUt = "<<AA<<endl;
            abort();
        }
#endif
    }
double Bs2JpsiPhi_mistagObservable_alt::Normalisation(DataPoint * measurement, PhaseSpaceBoundary * boundary)
{
	
	
	// Get observables into member variables
	t = measurement->GetObservable( timeName )->GetValue() - timeOffset;
	ctheta_tr = measurement->GetObservable( cosThetaName )->GetValue();
	phi_tr      = measurement->GetObservable( phiName )->GetValue();
	ctheta_1   = measurement->GetObservable( cosPsiName )->GetValue();	
	tagFraction = measurement->GetObservable( mistagName )->GetValue();
	//tagFraction= 0.5;  //PELC

	// Get time boundaries into member variables
	IConstraint * timeBound = boundary->GetConstraint("time");
	if ( timeBound->GetUnit() == "NameNotFoundError" ) {
		cerr << "Bound on time not provided" << endl;
		return 0;
	}
	else {
		tlo = timeBound->GetMinimum();
		thi = timeBound->GetMaximum();
	}
	
	
	// Recalculate cached values if Physics parameters have changed
	// Must do this for each of the two resolutions.
//PELC
// I dont think you can cache any more as normalisation depends upon the mistag which now changes per event.
	if( true /*! normalisationCacheValid*/ )  {
		for( tag = -1; tag <= 1; tag ++ ) {
            resolution =  resolution1 ;
			normalisationCacheValueRes1[tag+1] = this->diffXsecNorm1( );
            resolution =  resolution2 ;
			normalisationCacheValueRes2[tag+1] = this->diffXsecNorm1( );
		}
		normalisationCacheValid = true ;
	}	
	
	// Return normalisation value according to tag 

	tag = (int)measurement->GetObservable( tagName )->GetValue();

	double returnValue  = resolution1Fraction*normalisationCacheValueRes1[tag+1] + (1. - resolution1Fraction)*normalisationCacheValueRes2[tag+1] ;
	
	if( (returnValue <= 0.) || isnan(returnValue) ) {
		cout << " Bs2JpsiPhi_mistagObservable_alt::Normalisation() returns <=0 or nan " << endl ;
		cout << " gamma " << gamma() ;
		cout << " gl    " << gamma_l() ;
		cout << " gh    " << gamma_h() ;
		cout << " AT    " << AT() ;
		cout << " AP    " << AP() ;
		cout << " A0    " << A0() ;
		exit(1) ;
	}
	
	return returnValue ;
}
Example #12
0
inline void
SyrkLN
( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C, 
  bool conjugate=false )
{
#ifndef RELEASE
    PushCallStack("internal::SyrkLN");
    if( A.Grid() != C.Grid() )
        throw std::logic_error
        ("A and C must be distributed over the same grid");
    if( A.Height() != C.Height() || A.Height() != C.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal SyrkLN:\n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> A1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > A1Trans_STAR_MR(g);

    A1_MC_STAR.AlignWith( C );
    A1_VR_STAR.AlignWith( C );
    A1Trans_STAR_MR.AlignWith( C );

    // Start the algorithm
    ScaleTrapezoid( beta, LEFT, LOWER, 0, C );
    LockedPartitionRight( A, AL, AR, 0 );
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );

        //--------------------------------------------------------------------//
        A1_VR_STAR = A1_MC_STAR = A1;
        A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate );
        LocalTrrk( LOWER, alpha, A1_MC_STAR, A1Trans_STAR_MR, T(1), C );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #13
0
void test_zerosized() {
  // default constructors:
  Eigen::MatrixXd A;
  Eigen::VectorXd v;
  // explicit zero-sized:
  Eigen::ArrayXXd A0(0,0);
  Eigen::ArrayXd v0(0);

  // assigning empty objects to each other:
  A=A0;
  v=v0;
}
int hpx_main(boost::program_options::variables_map& vm)
{
    {
        orthotope<double>
            A0({3, 3})
          , A1({3, 3})
          , A2({3, 3})  
          , A3({3, 3})
          , A4({3, 3})
          , A5({4, 4})
            ;
    
        // QR {{1, 3, 6}, {3, 5, 7}, {6, 7, 4}}
        A0.row(0,  1,    3,    6   );
        A0.row(1,  3,    5,    7   );
        A0.row(2,  6,    7,    4   );
    
        // QR {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}}
        A1.row(0,  12,  -51,   4   );
        A1.row(1,  6,    167, -68  );
        A1.row(2, -4,    24,  -41  );
 
        // QR {{2, -2, 18}, {2, 1, 0}, {1, 2, 0}}
        A2.row(0,  2,   -2,    18  );
        A2.row(1,  2,    1,    0   );
        A2.row(2,  1,    2,    0   );
   
        // QR {{0, 1, 1}, {1, 1, 2}, {0, 0, 3}}
        A3.row(0,  0,    1,    1   );
        A3.row(1,  1,    1,    2   );
        A3.row(2,  0,    0,    3   );

        // QR {{1, 1, -1}, {1, 2, 1}, {1, 2, -1}}
        A4.row(0,  1,    1,   -1   );
        A4.row(1,  1,    2,    1   );
        A4.row(2,  1,    2,   -1   );
    
        // QR {{4, -2, 2, 8}, {-2, 6, 2, 4}, {2, 2, 10, -6}, {8, 4, -6, 12}}
        A5.row(0,  4,   -2,    2,    8   );
        A5.row(1, -2,    6,    2,    4   );
        A5.row(2,  2,    2,    10,  -6   );
        A5.row(3,  8,    4,   -6,    12  );
    
        householders(A0);
        householders(A1);
        householders(A2);
        householders(A3);
        householders(A4);
        householders(A5);
    }

    return hpx::finalize(); 
}
Example #15
0
TYPED_TEST(Intesection_TEST, intersection_ray_triangle)
{
	using Scalar = typename cgogn::geometry::vector_traits<TypeParam>::Scalar;
	TypeParam p0(Scalar(1), Scalar(1), Scalar(96.1));
	TypeParam p1(Scalar(5), Scalar(1), Scalar(92.3));
	TypeParam p2(Scalar(3), Scalar(5), Scalar(94.2));

	TypeParam A0(Scalar(3), Scalar(3), Scalar(0));
	TypeParam D0(Scalar(0.001), Scalar(0.001), Scalar(1.0));

	TypeParam A1(Scalar(3), Scalar(1), Scalar(0));
	TypeParam D1(Scalar(0), Scalar(0), Scalar(1.0));
	TypeParam A2(Scalar(5), Scalar(1), Scalar(0));
	TypeParam A3(Scalar(9), Scalar(5), Scalar(0));

	EXPECT_TRUE(cgogn::geometry::intersection_ray_triangle(A0,D0,p0,p1,p2));
	EXPECT_TRUE(cgogn::geometry::intersection_ray_triangle(A1,D1,p0,p1,p2));
	EXPECT_TRUE(cgogn::geometry::intersection_ray_triangle(A2,D1,p0,p1,p2));
	EXPECT_FALSE(cgogn::geometry::intersection_ray_triangle(A3,D0,p0,p1,p2));

}
Example #16
0
void collect_dist_matrix(boost::mpi::communicator& comm, bool here,
    const elem::DistMatrix<T, elem::STAR, ColDist> &DA,
    elem::Matrix<T> &A) {

    if (ColDist == elem::VR || ColDist == elem::VC) {
        // TODO this is probably the most laziest way to do it.
        //      Must be possible to do it much better (less communication).

        try {
            elem::Matrix<T> A0(DA.Height(), DA.Width(), DA.Height());
            const elem::Matrix<T> &A_local = DA.LockedMatrix();
            elem::Zero(A0);
            for(int j = 0; j < A_local.Width(); j++)
                for(int i = 0; i < A_local.Height(); i++)
                    A0.Set(i, DA.RowShift() + j * DA.RowStride(),
                        A_local.Get(i, j));

            boost::mpi::reduce (comm,
                A0.LockedBuffer(),
                A0.MemorySize(),
                A.Buffer(),
                std::plus<T>(),
                0);

        } catch (std::logic_error e) {
            SKYLARK_THROW_EXCEPTION (base::elemental_exception()
                << base::error_msg(e.what()) );
        } catch(boost::mpi::exception e) {
            SKYLARK_THROW_EXCEPTION (base::mpi_exception()
                << base::error_msg(e.what()) );
        }

    } else {
        SKYLARK_THROW_EXCEPTION ( base::unsupported_matrix_distribution() );
    }
}
Example #17
0
inline void 
GemmNNDot
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNDot");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNDot: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    if( A.Height() > B.Width() )
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g),
                      CB(g), C1(g), C10(g), C11(g), C12(g),
                             C2(g);

        // Temporary distributions
        DistMatrix<T,STAR,VC> A1_STAR_VC(g);
        DistMatrix<T,VC,STAR> B1_VC_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionDown
        ( A, AT,
             AB, 0 );
        PartitionDown
        ( C, CT,
             CB, 0 );
        while( AB.Height() > 0 )
        {
            LockedRepartitionDown
            ( AT,  A0,
             /**/ /**/
                   A1,
              AB,  A2 );

            RepartitionDown
            ( CT,  C0,
             /**/ /**/
                   C1,
              CB,  C2 );

            A1_STAR_VC = A1; 
            B1_VC_STAR.AlignWith( A1_STAR_VC );

            LockedPartitionRight( B, BL, BR, 0 );
            PartitionRight( C1, C1L, C1R, 0 );
            while( BR.Width() > 0 )
            {
                LockedRepartitionRight
                ( BL, /**/ BR,
                  B0, /**/ B1, B2 );

                RepartitionRight
                ( C1L, /**/ C1R,
                  C10, /**/ C11, C12 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                B1_VC_STAR = B1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionRight
                ( BL,     /**/ BR,
                  B0, B1, /**/ B2 );

                SlidePartitionRight
                ( C1L,      /**/ C1R,
                  C10, C11, /**/ C12 );
            }
            B1_VC_STAR.FreeAlignments();

            SlideLockedPartitionDown
            ( AT,  A0,
                   A1,
             /**/ /**/
              AB,  A2 );

            SlidePartitionDown
            ( CT,  C0,
                   C1,
             /**/ /**/
              CB,  C2 );
        }
    }
    else
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> 
            CL(g), CR(g),         C1T(g),  C01(g),
            C0(g), C1(g), C2(g),  C1B(g),  C11(g),
                                           C21(g);

        // Temporary distributions
        DistMatrix<T,STAR,VR> A1_STAR_VR(g);
        DistMatrix<T,VR,STAR> B1_VR_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionRight( B, BL, BR, 0 );
        PartitionRight( C, CL, CR, 0 );
        while( BR.Width() > 0 )
        {
            LockedRepartitionRight
            ( BL, /**/ BR,
              B0, /**/ B1, B2 );

            RepartitionRight
            ( CL, /**/ CR,
              C0, /**/ C1, C2 );

            B1_VR_STAR = B1;
            A1_STAR_VR.AlignWith( B1_VR_STAR );

            LockedPartitionDown
            ( A, AT,
                 AB, 0 );
            PartitionDown
            ( C1, C1T,
                  C1B, 0 );
            while( AB.Height() > 0 )
            {
                LockedRepartitionDown
                ( AT,  A0,
                 /**/ /**/
                       A1,
                  AB,  A2 );

                RepartitionDown
                ( C1T,  C01,
                 /***/ /***/
                        C11,
                  C1B,  C21 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                A1_STAR_VR = A1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionDown
                ( AT,  A0,
                       A1,
                 /**/ /**/
                  AB,  A2 );

                SlidePartitionDown
                ( C1T,  C01,
                        C11,
                 /***/ /***/
                  C1B,  C21 );
            }
            A1_STAR_VR.FreeAlignments();

            SlideLockedPartitionRight
            ( BL,     /**/ BR,
              B0, B1, /**/ B2 ); 

            SlidePartitionRight
            ( CL,     /**/ CR,
              C0, C1, /**/ C2 );
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
inline void
internal::ApplyPackedReflectorsLLVF
( Conjugation conjugation, int offset, 
  const DistMatrix<Complex<R>,MC,MR  >& H,
  const DistMatrix<Complex<R>,MD,STAR>& t,
        DistMatrix<Complex<R>,MC,MR  >& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLLVF");
    if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
        throw std::logic_error
        ("{H,t,A} must be distributed over the same grid");
    if( offset > 0 )
        throw std::logic_error("Transforms cannot extend above matrix");
    if( offset < -H.Height() )
        throw std::logic_error("Transforms cannot extend below matrix");
    if( H.Height() != A.Height() )
        throw std::logic_error
        ("Height of transforms must equal height of target matrix");
    if( t.Height() != H.DiagonalLength( offset ) )
        throw std::logic_error("t must be the same length as H's offset diag.");
    if( !t.AlignedWithDiagonal( H, offset ) )
        throw std::logic_error("t must be aligned with H's 'offset' diagonal");
#endif
    typedef Complex<R> C;
    const Grid& g = H.Grid();

    // Matrix views    
    DistMatrix<C,MC,MR>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<C,MC,MR>
        AT(g),  A0(g),
        AB(g),  A1(g),
                A2(g);
    DistMatrix<C,MD,STAR>
        tT(g),  t0(g),
        tB(g),  t1(g),
                t2(g);

    DistMatrix<C,VC,  STAR> HPan_VC_STAR(g);
    DistMatrix<C,MC,  STAR> HPan_MC_STAR(g);
    DistMatrix<C,STAR,STAR> t1_STAR_STAR(g);
    DistMatrix<C,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<C,STAR,MR  > Z_STAR_MR(g);
    DistMatrix<C,STAR,VR  > Z_STAR_VR(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    LockedPartitionDown
    ( t, tT,
         tB, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        int HPanHeight = H11.Height() + H21.Height();
        int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) );
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        LockedRepartitionDown
        ( tT,  t0,
         /**/ /**/
               t1,
          tB,  t2, HPanWidth );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        HPan_MC_STAR.AlignWith( AB );
        Z_STAR_MR.AlignWith( AB );
        Z_STAR_VR.AlignWith( AB );
        Z_STAR_MR.ResizeTo( HPan.Width(), AB.Width() );
        SInv_STAR_STAR.ResizeTo( HPan.Width(), HPan.Width() );
        Zero( SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        HPan_VC_STAR = HPanCopy;
        Herk
        ( UPPER, ADJOINT, 
          (C)1, HPan_VC_STAR.LockedLocalMatrix(),
          (C)0, SInv_STAR_STAR.LocalMatrix() );     
        SInv_STAR_STAR.SumOverGrid();
        t1_STAR_STAR = t1;
        FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );

        HPan_MC_STAR = HPanCopy;
        internal::LocalGemm
        ( ADJOINT, NORMAL, (C)1, HPan_MC_STAR, AB, (C)0, Z_STAR_MR );
        Z_STAR_VR.SumScatterFrom( Z_STAR_MR );
        
        internal::LocalTrsm
        ( LEFT, UPPER, ADJOINT, NON_UNIT, (C)1, SInv_STAR_STAR, Z_STAR_VR );

        Z_STAR_MR = Z_STAR_VR;
        internal::LocalGemm
        ( NORMAL, NORMAL, (C)-1, HPan_MC_STAR, Z_STAR_MR, (C)1, AB );
        //--------------------------------------------------------------------//
        HPan_MC_STAR.FreeAlignments();
        Z_STAR_MR.FreeAlignments();
        Z_STAR_VR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlideLockedPartitionDown
        ( tT,  t0,
               t1,
         /**/ /**/
          tB,  t2 ); 

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
inline void
internal::ApplyPackedReflectorsLLVF
( int offset, 
  const DistMatrix<R,MC,MR>& H,
        DistMatrix<R,MC,MR>& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLLVF");
    if( H.Grid() != A.Grid() )
        throw std::logic_error("{H,A} must be distributed over the same grid");
    if( offset > 0 )
        throw std::logic_error("Transforms cannot extend above matrix");
    if( offset < -H.Height() )
        throw std::logic_error("Transforms cannot extend below matrix");
    if( H.Height() != A.Height() )
        throw std::logic_error
        ("Height of transforms must equal height of target matrix");
#endif
    const Grid& g = H.Grid();

    // Matrix views    
    DistMatrix<R,MC,MR>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g), HPanCopy(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<R,MC,MR>
        AT(g),  A0(g),
        AB(g),  A1(g),
                A2(g);

    DistMatrix<R,VC,  STAR> HPan_VC_STAR(g);
    DistMatrix<R,MC,  STAR> HPan_MC_STAR(g);
    DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<R,STAR,MR  > Z_STAR_MR(g);
    DistMatrix<R,STAR,VR  > Z_STAR_VR(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );
        
        int HPanHeight = H11.Height() + H21.Height();
        int HPanWidth = std::min( H11.Width(), std::max(HPanHeight+offset,0) );
        HPan.LockedView( H, H00.Height(), H00.Width(), HPanHeight, HPanWidth );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        HPan_MC_STAR.AlignWith( AB );
        Z_STAR_MR.AlignWith( AB );
        Z_STAR_VR.AlignWith( AB );
        Z_STAR_MR.ResizeTo( HPanWidth, AB.Width() );
        SInv_STAR_STAR.ResizeTo( HPanWidth, HPanWidth );
        Zero( SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( LEFT, LOWER, offset, HPanCopy );
        SetDiagonalToOne( LEFT, offset, HPanCopy );

        HPan_VC_STAR = HPanCopy;
        Syrk
        ( UPPER, TRANSPOSE, 
          (R)1, HPan_VC_STAR.LockedLocalMatrix(),
          (R)0, SInv_STAR_STAR.LocalMatrix() );     
        SInv_STAR_STAR.SumOverGrid();
        HalveMainDiagonal( SInv_STAR_STAR );

        HPan_MC_STAR = HPanCopy;
        internal::LocalGemm
        ( TRANSPOSE, NORMAL, 
          (R)1, HPan_MC_STAR, AB, (R)0, Z_STAR_MR );
        Z_STAR_VR.SumScatterFrom( Z_STAR_MR );
        
        internal::LocalTrsm
        ( LEFT, UPPER, TRANSPOSE, NON_UNIT, 
          (R)1, SInv_STAR_STAR, Z_STAR_VR );

        Z_STAR_MR = Z_STAR_VR;
        internal::LocalGemm
        ( NORMAL, NORMAL, (R)-1, HPan_MC_STAR, Z_STAR_MR, (R)1, AB );
        //--------------------------------------------------------------------//
        HPan_MC_STAR.FreeAlignments();
        Z_STAR_MR.FreeAlignments();
        Z_STAR_VR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #20
0
inline void
Her2kLN
( T alpha, const DistMatrix<T,MC,MR>& A,
           const DistMatrix<T,MC,MR>& B,
  T beta,        DistMatrix<T,MC,MR>& C )
{
#ifndef RELEASE
    PushCallStack("internal::Her2kLN");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() || A.Height() != C.Width() ||
        B.Height() != C.Height() || B.Height() != C.Width() ||
        A.Width() != B.Width() )
    {
        std::ostringstream msg;
        msg << "Nonconformal Her2kLN:\n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views 
    DistMatrix<T,MC,MR> AL(g), AR(g),
                        A0(g), A1(g), A2(g);
    DistMatrix<T,MC,MR> BL(g), BR(g),
                        B0(g), B1(g), B2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,MC,  STAR> B1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> A1_VR_STAR(g);
    DistMatrix<T,VR,  STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > A1Adj_STAR_MR(g);
    DistMatrix<T,STAR,MR  > B1Adj_STAR_MR(g);

    A1_MC_STAR.AlignWith( C );
    B1_MC_STAR.AlignWith( C );
    A1_VR_STAR.AlignWith( C );
    B1_VR_STAR.AlignWith( C );
    A1Adj_STAR_MR.AlignWith( C );
    B1Adj_STAR_MR.AlignWith( C );

    // Start the algorithm
    ScaleTrapezoid( beta, LEFT, LOWER, 0, C );
    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionRight( B, BL, BR, 0 );
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );

        LockedRepartitionRight
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );

        //--------------------------------------------------------------------//
        A1_VR_STAR = A1_MC_STAR = A1;
        A1Adj_STAR_MR.AdjointFrom( A1_VR_STAR );

        B1_VR_STAR = B1_MC_STAR = B1;
        B1Adj_STAR_MR.AdjointFrom( B1_VR_STAR );

        LocalTrr2k
        ( LOWER, 
          alpha, A1_MC_STAR, B1Adj_STAR_MR, 
                 B1_MC_STAR, A1Adj_STAR_MR,
          T(1),  C );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #21
0
inline void
Syr2kUT
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C,
  bool conjugate=false )
{
#ifndef RELEASE
    CallStackEntry entry("internal::Syr2kUT");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Width() != C.Height() || 
        A.Width() != C.Width()  ||
        B.Width() != C.Height() ||
        B.Width() != C.Width()  ||
        A.Height() != B.Height()  )
    {
        std::ostringstream msg;
        msg << "Nonconformal Syr2kUT:\n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();
    const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE );

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    // Temporary distributions
    DistMatrix<T,MR,  STAR> A1Trans_MR_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,STAR,VR  > A1_STAR_VR(g);
    DistMatrix<T,STAR,VR  > B1_STAR_VR(g);
    DistMatrix<T,STAR,MC  > A1_STAR_MC(g);
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);

    A1Trans_MR_STAR.AlignWith( C );
    B1Trans_MR_STAR.AlignWith( C );
    A1_STAR_MC.AlignWith( C );
    B1_STAR_MC.AlignWith( C );

    // Start the algorithm
    ScaleTrapezoid( beta, LEFT, UPPER, 0, C );
    LockedPartitionDown
    ( A, AT, 
         AB, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        //--------------------------------------------------------------------//
        A1Trans_MR_STAR.TransposeFrom( A1 );
        A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR );
        A1_STAR_MC = A1_STAR_VR;

        B1Trans_MR_STAR.TransposeFrom( B1 );
        B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR );
        B1_STAR_MC = B1_STAR_VR;

        LocalTrr2k
        ( UPPER, orientation, TRANSPOSE, orientation, TRANSPOSE, 
          alpha, A1_STAR_MC, B1Trans_MR_STAR,
                 B1_STAR_MC, A1Trans_MR_STAR,
          T(1),  C );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
    }
}
Example #22
0
void Trr2kNTTN
( UpperOrLower uplo,
  Orientation orientationOfB, Orientation orientationOfC,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    CallStackEntry entry("internal::Trr2kNTTN");
    if( E.Height() != E.Width()  || A.Width()  != C.Height() ||
        A.Height() != E.Height() || C.Width()  != E.Height() ||
        B.Height() != E.Width()  || D.Width()  != E.Width()  ||
        A.Width()  != B.Width()  || C.Height() != D.Height() )
        LogicError("Nonconformal Trr2kNTTN");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);

    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);
    DistMatrix<T> DT(g),  D0(g),
                  DB(g),  D1(g),
                          D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > B1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > C1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> D1Trans_MR_STAR(g);

    A1_MC_STAR.AlignWith( E );
    B1_VR_STAR.AlignWith( E );
    B1AdjOrTrans_STAR_MR.AlignWith( E );
    C1_STAR_MC.AlignWith( E );
    D1Trans_MR_STAR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionRight( B, BL, BR, 0 );
    LockedPartitionDown
    ( C, CT,
         CB, 0 );
    LockedPartitionDown
    ( D, DT,
         DB, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionRight
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );
        LockedRepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );
        LockedRepartitionDown
        ( DT,  D0,
         /**/ /**/
               D1,
          DB,  D2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_STAR_MC = C1;
        B1_VR_STAR = B1;
        if( orientationOfB == ADJOINT )
            B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR );
        else
            B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR );
        D1Trans_MR_STAR.TransposeFrom( D1 );
        LocalTrr2k 
        ( uplo, orientationOfC, TRANSPOSE,
          alpha, A1_MC_STAR, B1AdjOrTrans_STAR_MR,
                 C1_STAR_MC, D1Trans_MR_STAR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );
        SlideLockedPartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
        SlideLockedPartitionDown
        ( DT,  D0,
               D1,
         /**/ /**/
          DB,  D2 );
    }
}
Example #23
0
inline void
ApplyPackedReflectorsLUVF
( int offset, 
  const DistMatrix<R>& H,
        DistMatrix<R>& A )
{
#ifndef RELEASE
    PushCallStack("internal::ApplyPackedReflectorsLUVF");
    if( H.Grid() != A.Grid() )
        throw std::logic_error("{H,A} must be distributed over the same grid");
    if( offset < 0 || offset > H.Height() )
        throw std::logic_error("Transforms out of bounds");
    if( H.Width() != A.Height() )
        throw std::logic_error
        ("Width of transforms must equal height of target matrix");
#endif
    const Grid& g = H.Grid();

    DistMatrix<R>
        HTL(g), HTR(g),  H00(g), H01(g), H02(g),  HPan(g),
        HBL(g), HBR(g),  H10(g), H11(g), H12(g),
                         H20(g), H21(g), H22(g);
    DistMatrix<R>
        AT(g),  A0(g),  ATop(g),
        AB(g),  A1(g),
                A2(g);

    DistMatrix<R> HPanCopy(g);
    DistMatrix<R,VC,  STAR> HPan_VC_STAR(g);
    DistMatrix<R,MC,  STAR> HPan_MC_STAR(g);
    DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g);
    DistMatrix<R,STAR,MR  > Z_STAR_MR(g);
    DistMatrix<R,STAR,VR  > Z_STAR_VR(g);

    LockedPartitionDownDiagonal
    ( H, HTL, HTR,
         HBL, HBR, 0 );
    PartitionDown
    ( A, AT,
         AB, 0 );
    while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
    {
        LockedRepartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, /**/ H01, H02,
         /*************/ /******************/
               /**/       H10, /**/ H11, H12,
          HBL, /**/ HBR,  H20, /**/ H21, H22 );

        const int HPanHeight = H01.Height() + H11.Height();
        const int HPanOffset = 
            std::min( H11.Width(), std::max(offset-H00.Width(),0) );
        const int HPanWidth = H11.Width()-HPanOffset;
        HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth );

        RepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        ATop.View2x1( A0, 
                      A1 );

        HPan_MC_STAR.AlignWith( ATop );
        Z_STAR_MR.AlignWith( ATop );
        Z_STAR_VR.AlignWith( ATop );
        Zeros( HPan.Width(), ATop.Width(), Z_STAR_MR );
        Zeros( HPan.Width(), HPan.Width(), SInv_STAR_STAR );
        //--------------------------------------------------------------------//
        HPanCopy = HPan;
        MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy );
        SetDiagonalToOne( RIGHT, offset, HPanCopy );
        HPan_VC_STAR = HPanCopy;
        Syrk
        ( LOWER, TRANSPOSE, 
          R(1), HPan_VC_STAR.LockedLocalMatrix(),
          R(0), SInv_STAR_STAR.LocalMatrix() ); 
        SInv_STAR_STAR.SumOverGrid();
        HalveMainDiagonal( SInv_STAR_STAR );

        HPan_MC_STAR = HPanCopy;
        LocalGemm
        ( TRANSPOSE, NORMAL, R(1), HPan_MC_STAR, ATop, R(0), Z_STAR_MR );
        Z_STAR_VR.SumScatterFrom( Z_STAR_MR );
        
        LocalTrsm
        ( LEFT, LOWER, NORMAL, NON_UNIT, 
          R(1), SInv_STAR_STAR, Z_STAR_VR );

        Z_STAR_MR = Z_STAR_VR;
        LocalGemm( NORMAL, NORMAL, R(-1), HPan_MC_STAR, Z_STAR_MR, R(1), ATop );
        //--------------------------------------------------------------------//
        HPan_MC_STAR.FreeAlignments();
        Z_STAR_MR.FreeAlignments();
        Z_STAR_VR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( HTL, /**/ HTR,  H00, H01, /**/ H02,
               /**/       H10, H11, /**/ H12,
         /*************/ /******************/
          HBL, /**/ HBR,  H20, H21, /**/ H22 );

        SlidePartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #24
0
double LoopToolsWrapper::PV_A0(const double mu2, const double m2) const
{
    setmudim(mu2);
    std::complex<double> A0val = A0(m2);
    return ( A0val.real() );
}
Example #25
0
void Trr2kNNNT
( UpperOrLower uplo,
  Orientation orientationOfD,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    PushCallStack("internal::Trr2kNNNT");
    if( E.Height() != E.Width()  || A.Width()  != C.Width()  ||
        A.Height() != E.Height() || C.Height() != E.Height() ||
        B.Width()  != E.Width()  || D.Height() != E.Width()  ||
        A.Width()  != B.Height() || C.Width()  != D.Width() )
        throw std::logic_error("Nonconformal Trr2kNNNT");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);
    DistMatrix<T> DL(g), DR(g),
                  D0(g), D1(g), D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,MC,  STAR> C1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> D1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > D1AdjOrTrans_STAR_MR(g);

    A1_MC_STAR.AlignWith( E );
    B1Trans_MR_STAR.AlignWith( E );
    C1_MC_STAR.AlignWith( E );
    D1_VR_STAR.AlignWith( E );
    D1AdjOrTrans_STAR_MR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    LockedPartitionRight( C, CL, CR, 0 );
    LockedPartitionRight( D, DL, DR, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );
        LockedRepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );
        LockedRepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_MC_STAR = C1;
        B1Trans_MR_STAR.TransposeFrom( B1 );
        D1_VR_STAR = D1;
        if( orientationOfD == ADJOINT )
            D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR );
        else
            D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR );
        LocalTrr2k
        ( uplo, TRANSPOSE, 
          alpha, A1_MC_STAR, B1Trans_MR_STAR, 
                 C1_MC_STAR, D1AdjOrTrans_STAR_MR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( DL,     /**/ DR,
          D0, D1, /**/ D2 );
        SlideLockedPartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #26
0
inline void
GemmTTC
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTC expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTC: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC  > A1_STAR_MC(g);
    DistMatrix<T,VR,  STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > B1AdjOrTrans_STAR_MR(g);

    A1_STAR_MC.AlignWith( C );
    B1_VR_STAR.AlignWith( C );
    B1AdjOrTrans_STAR_MR.AlignWith( C );
    
    // Start the algorithm    
    Scale( beta, C );
    LockedPartitionDown
    ( A, AT,
         AB, 0 ); 
    LockedPartitionRight( B, BL, BR, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        LockedRepartitionRight
        ( BL, /**/     BR,
          B0, /**/ B1, B2 );

        //--------------------------------------------------------------------//
        A1_STAR_MC = A1; 
        B1_VR_STAR = B1;
        if( orientationOfB == ADJOINT )
            B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR );
        else
            B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR );

        // C[MC,MR] += alpha (A1[*,MC])^[T/H] (B1[MR,*])^[T/H]
        //           = alpha (A1^[T/H])[MC,*] (B1^[T/H])[*,MR]
        LocalGemm
        ( orientationOfA, NORMAL, 
          alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, T(1), C );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #27
0
inline void
GemmTTB
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTB expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,VR,  STAR> A1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > A1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > D1_STAR_MC(g);
    DistMatrix<T,MR,  MC  > D1_MR_MC(g);
    DistMatrix<T> D1(g);

    A1_VR_STAR.AlignWith( B );
    A1AdjOrTrans_STAR_MR.AlignWith( B );
    D1_STAR_MC.AlignWith( B );

    // Start the algorithm 
    Scale( beta, C );
    LockedPartitionRight( A, AL, AR, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight
        ( AL, /**/     AR,
          A0, /**/ A1, A2 );
 
        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        D1.AlignWith( C1 );
        Zeros( C1.Height(), C1.Width(), D1_STAR_MC );
        //--------------------------------------------------------------------//
        A1_VR_STAR = A1;
        if( orientationOfA == ADJOINT )
            A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR );
        else
            A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR );
 
        // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H]
        //           = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC]
        LocalGemm
        ( NORMAL, orientationOfB, 
          alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC );

        // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows
        D1_MR_MC.SumScatterFrom( D1_STAR_MC );
        D1 = D1_MR_MC; 
        Axpy( T(1), D1, C1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #28
0
inline void 
GemmNNC
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNC: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);         
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    // Temporary distributions
    DistMatrix<T,MC,STAR> A1_MC_STAR(g);
    DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); 

    A1_MC_STAR.AlignWith( C );
    B1Trans_MR_STAR.AlignWith( C );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionRight( A, AL, AR, 0 ); 
    LockedPartitionDown
    ( B, BT, 
         BB, 0 ); 
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight( AL, /**/ AR,
                                A0, /**/ A1, A2 );

        LockedRepartitionDown( BT,  B0,
                              /**/ /**/
                                    B1, 
                               BB,  B2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1; 
        B1Trans_MR_STAR.TransposeFrom( B1 );

        // C[MC,MR] += alpha A1[MC,*] (B1^T[MR,*])^T
        //           = alpha A1[MC,*] B1[*,MR]
        LocalGemm
        ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, T(1), C );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight( AL,     /**/ AR,
                                   A0, A1, /**/ A2 );

        SlideLockedPartitionDown( BT,  B0,
                                       B1,
                                 /**/ /**/
                                  BB,  B2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Example #29
0
    static void BlockHessenberg(
        MatrixView<T> A, VectorView<T> Ubeta)
    {
        // Much like the block version of Bidiagonalize, we try to maintain
        // the operation of several successive Householder matrices in
        // a block form, where the net Block Householder is I - YZYt.
        //
        // But as with the bidiagonlization algorithm (and unlike a simple
        // block QR decomposition), we update the matrix from both the left 
        // and the right, so we also need to keep track of the product
        // ZYtm in addition.
        //
        // The block update at the end of the block loop is
        // m' = (I-YZYt) m (I-YZtYt)
        //
        // The Y matrix is stored in the first K columns of m,
        // and the Hessenberg portion of these columns is updated as we go.
        // For the right-hand-side update, m -= mYZtYt, the m on the right
        // needs to be the full original matrix m, including the original
        // versions of these K columns.  Therefore, we can't wait until 
        // the end for this calculation.  
        //
        // Instead, we keep track of mYZt as we progress, so the final update
        // is:
        //
        // m' = (I-YZYt) (m - mYZt Y)
        //
        // We also need to do this same calculation for each column as we
        // progress through the block.
        //
        const ptrdiff_t N = A.rowsize();

#ifdef XDEBUG
        Matrix<T> A0(A);
#endif

        TMVAssert(A.rowsize() == A.colsize());
        TMVAssert(N > 0);
        TMVAssert(Ubeta.size() == N-1);
        TMVAssert(!Ubeta.isconj());
        TMVAssert(Ubeta.step()==1);

        ptrdiff_t ncolmax = MIN(HESS_BLOCKSIZE,N-1);
        Matrix<T,RowMajor> mYZt_full(N,ncolmax);
        UpperTriMatrix<T,NonUnitDiag|ColMajor> Z_full(ncolmax);

        T det(0); // Ignore Householder Determinant calculations
        T* Uj = Ubeta.ptr();
        for(ptrdiff_t j1=0;j1<N-1;) {
            ptrdiff_t j2 = MIN(N-1,j1+HESS_BLOCKSIZE);
            ptrdiff_t ncols = j2-j1;
            MatrixView<T> mYZt = mYZt_full.subMatrix(0,N-j1,0,ncols);
            UpperTriMatrixView<T> Z = Z_full.subTriMatrix(0,ncols);

            for(ptrdiff_t j=j1,jj=0;j<j2;++j,++jj,++Uj) { // jj = j-j1

                // Update current column of A
                //
                // m' = (I - YZYt) (m - mYZt Yt)
                // A(0:N,j)' = A(0:N,j) - mYZt(0:N,0:j) Y(j,0:j)t
                A.col(j,j1+1,N) -= mYZt.Cols(0,j) * A.row(j,0,j).Conjugate();
                //
                // A(0:N,j)'' = A(0:N,j) - Y Z Yt A(0:N,j)'
                // 
                // Let Y = (L)     where L is unit-diagonal, lower-triangular,
                //         (M)     and M is rectangular
                //
                LowerTriMatrixView<T> L = 
                    LowerTriMatrixViewOf(A.subMatrix(j1+1,j+1,j1,j),UnitDiag);
                MatrixView<T> M = A.subMatrix(j+1,N,j1,j);
                // Use the last column of Z as temporary storage for Yt A(0:N,j)'
                VectorView<T> YtAj = Z.col(jj,0,jj);
                YtAj = L.adjoint() * A.col(j,j1+1,j+1);
                YtAj += M.adjoint() * A.col(j,j+1,N);
                YtAj = Z.subTriMatrix(0,jj) * YtAj;
                A.col(j,j1+1,j+1) -= L * YtAj;
                A.col(j,j+1,N) -= M * YtAj;

                // Do the Householder reflection 
                VectorView<T> u = A.col(j,j+1,N);
                T bu = Householder_Reflect(u,det);
#ifdef TMVFLDEBUG
                TMVAssert(Uj >= Ubeta._first);
                TMVAssert(Uj < Ubeta._last);
#endif
                *Uj = bu;

                // Save the top of the u vector, which isn't actually part of u
                T& Atemp = *u.cptr();
                TMVAssert(IMAG(Atemp) == RealType(T)(0));
                RealType(T) Aorig = REAL(Atemp);
                Atemp = RealType(T)(1);

                // Update Z
                VectorView<T> Zj = Z.col(jj,0,jj);
                Zj = -bu * M.adjoint() * u;
                Zj = Z * Zj;
                Z(jj,jj) = -bu;

                // Update mYtZt:
                //
                // mYZt(0:N,j) = m(0:N,0:N) Y(0:N,0:j) Zt(0:j,j)
                //             = m(0:N,j+1:N) Y(j+1:N,j) Zt(j,j)
                //             = bu* m(0:N,j+1:N) u 
                //
                mYZt.col(jj) = CONJ(bu) * A.subMatrix(j1,N,j+1,N) * u;

                // Restore Aorig, which is actually part of the Hessenberg matrix.
                Atemp = Aorig;
            }

            // Update the rest of the matrix:
            // A(j2,j2-1) needs to be temporarily changed to 1 for use in Y
            T& Atemp = *(A.ptr() + j2*A.stepi() + (j2-1)*A.stepj());
            TMVAssert(IMAG(Atemp) == RealType(T)(0));
            RealType(T) Aorig = Atemp;
            Atemp = RealType(T)(1);

            // m' = (I-YZYt) (m - mYZt Y)
            MatrixView<T> m = A.subMatrix(j1,N,j2,N);
            ConstMatrixView<T> Y = A.subMatrix(j2+1,N,j1,j2);
            m -= mYZt * Y.adjoint();
            BlockHouseholder_LMult(Y,Z,m);

            // Restore A(j2,j2-1)
            Atemp = Aorig;
            j1 = j2;
        }

#ifdef XDEBUG
        Matrix<T> U(N,N,T(0));
        U.subMatrix(1,N,1,N) = A.subMatrix(1,N,0,N-1);
        U.upperTri().setZero();
        U(0,0) = T(1);
        Vector<T> Ubeta2(N);
        Ubeta2.subVector(1,N) = Ubeta;
        Ubeta2(0) = T(0);
        GetQFromQR(U.view(),Ubeta2);
        Matrix<T> H = A;
        if (N>2) LowerTriMatrixViewOf(H).offDiag(2).setZero();
        Matrix<T> AA = U*H*U.adjoint();
        if (Norm(A0-AA) > 0.001*Norm(A0)) {
            cerr<<"NonBlock Hessenberg: A = "<<Type(A)<<"  "<<A0<<endl;
            cerr<<"A = "<<A<<endl;
            cerr<<"Ubeta = "<<Ubeta<<endl;
            cerr<<"U = "<<U<<endl;
            cerr<<"H = "<<H<<endl;
            cerr<<"UHUt = "<<AA<<endl;
            Matrix<T,ColMajor> A2 = A0;
            Vector<T> Ub2(Ubeta.size());
            NonBlockHessenberg(A2.view(),Ub2.view());
            cerr<<"cf NonBlock: A -> "<<A2<<endl;
            cerr<<"Ubeta = "<<Ub2<<endl;
            abort();
        }
#endif
    }
Example #30
0
inline void 
GemmNNB
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC> A1_STAR_MC(g);
    DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g);

    A1_STAR_MC.AlignWith( B );
    D1Trans_MR_STAR.AlignWith( B );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDown
    ( A, AT,
         AB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//
        A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR]

        // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ]
        LocalGemm
        ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR );

        C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
 
        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}