Esempio n. 1
0
void
LSLocateCircularInterface::setLevelSetPatchData(int D_idx,
                                                Pointer<HierarchyMathOps> hier_math_ops,
                                                double /*time*/,
                                                bool /*initial_time*/)
{
    // In this version of this class, the initial level set location is set to be
    // exact since we always know the radius of the ball

    Pointer<PatchHierarchy<NDIM> > patch_hierarchy = hier_math_ops->getPatchHierarchy();
    const int coarsest_ln = 0;
    const int finest_ln = patch_hierarchy->getFinestLevelNumber();

    // Set the initial condition for locating the interface
    const double& R = d_circle->R;
    const IBTK::Vector& X0 = d_circle->X0;

    for (int ln = coarsest_ln; ln <= finest_ln; ++ln)
    {
        Pointer<PatchLevel<NDIM> > level = patch_hierarchy->getPatchLevel(ln);
        for (PatchLevel<NDIM>::Iterator p(level); p; p++)
        {
            Pointer<Patch<NDIM> > patch = level->getPatch(p());
            const Box<NDIM>& patch_box = patch->getBox();
            Pointer<CellData<NDIM, double> > D_data = patch->getPatchData(D_idx);
            for (Box<NDIM>::Iterator it(patch_box); it; it++)
            {
                CellIndex<NDIM> ci(it());

                // Get physical coordinates
                IBTK::Vector coord = IBTK::Vector::Zero();
                Pointer<CartesianPatchGeometry<NDIM> > patch_geom = patch->getPatchGeometry();
                const double* patch_X_lower = patch_geom->getXLower();
                const hier::Index<NDIM>& patch_lower_idx = patch_box.lower();
                const double* const patch_dx = patch_geom->getDx();
                for (int d = 0; d < NDIM; ++d)
                {
                    coord[d] = patch_X_lower[d] + patch_dx[d] * (static_cast<double>(ci(d) - patch_lower_idx(d)) + 0.5);
                }

                const double distance = std::sqrt(std::pow((coord[0] - X0(0)), 2.0) + std::pow((coord[1] - X0(1)), 2.0)
#if (NDIM == 3)
                                                  + std::pow((coord[2] - X0(2)), 2.0)
#endif
                );

                (*D_data)(ci) = distance - R;
            }
        }
    }
    return;
} // setLevelSetPatchData
Esempio n. 2
0
template <class cCRNode> void  cTplCoxRoyAlgo<cCRNode>::SetStdCostRegul(double aCoeff,double aCste,int aVmin)
{
    for (int anX=X0(); anX<X1() ; anX++)
        for (int anY=Y0(); anY<Y1() ; anY++)
             for (int aZ = ZMin(anX,anY); aZ< ZMax(anX,anY) ; aZ++)
             {
                 cRoyPt    aP1 (anX,anY,aZ);
                 cCRNode & aS1 = NodeOfP(aP1);
                 int  aC1 = aS1.ResidualFlow(mDirZPlus);

                 for (int anEdg=0; anEdg<NbEdges() ; anEdg++)
                 {
                     if (aS1.EdgeIsValide(anEdg) && (!tabCRIsVertical[anEdg]))
                     {
                           cRoyPt aP2(aP1,anEdg);
                           cCRNode & aS2 = NodeOfP(aP2);
                           int aC2 = aS2.ResidualFlow(mDirZPlus);

                           double aCost = aCste + aCoeff*(aC1+aC2)/2.0;
                           if (Cnx8())
                               aCost *= tabCRIsArcV8[anEdg] ? 0.2928 : 0.4142 ;
                           int iCost = int(aCost+0.5);
                           if (iCost<aVmin)
                              iCost = aVmin;
                           aS1.SetResidualFlow(anEdg,iCost);
                     }
                 }
             }
}
Esempio n. 3
0
/** An sample for taylor expansion of logdet(X). */
void taylorSample() {

  std::string ans;
  char rowChar[5];
  int rowTmp = ROW;
  sprintf(rowChar, "%d", rowTmp);
  std::string row = rowChar;
  // Initialize the matrices.
  symbolic_matrix_type X("X", ROW, COL);
  symbolic_matrix_type X0("X0", ROW, COL);
  symbolic_matrix_type Delta("(X-X0)", ROW, COL);
    
  AMD::SymbolicScalarMatlab a2("1/2!");
  AMD::SymbolicScalarMatlab a3("1/3!"); 
  SymbolicSMFunc r2(a2,ROW,COL);
  SymbolicSMFunc r3(a3,ROW, COL);

  // Initialize MatrixMatrixFunction. 
  SymbolicMMFunc fX(X, false);
  SymbolicMMFunc fX0(X0, false);
  SymbolicMMFunc fDelta(Delta, true);

  // Compute Taylor series iteratively. 
  SymbolicSMFunc f0 =  logdet(fX0);
  SymbolicSMFunc f1 = trace(fDelta * transpose(*f0.derivativeFuncVal));
  SymbolicSMFunc f2 = trace(fDelta * transpose(*f1.derivativeFuncVal));
  SymbolicSMFunc f3 = trace(fDelta * transpose(*f2.derivativeFuncVal));
  // Taylor Expansion. 
  SymbolicSMFunc func = f0 + f1 + r2*f2 + r3*f3;

  std::cout<<"The first 4 terms of Taylor Expansion for logdet(X) around X0 is:";
  std::cout << std::endl;
  std::cout << func.functionVal.getString() << std::endl;

}
Esempio n. 4
0
AR_Process::AR_Process(void)
{
    // State Equation
    F.resize(1,1);
    F(0,0) = 0.8;

    f.resize(1);
    f(0) = 0.;

    G.resize(1,1);
    G.identity();

    Qw.resize(1);
    Qw(0,0)= 0.1;

    // Observation noise
    H.resize(1,1);
    H(0,0) = 1;

    h.resize(1);
    h(0) = 0.;

    Qv.resize(1);
    Qv(0,0)=1;

    // Init state
    X0.resize(1);
    X0(0) = 10.;

    R0.resize(1);
    R0.zero();
}
Esempio n. 5
0
inline void
TrmmLLNA
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T>& L,
                 DistMatrix<T>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmLLNA");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( L.Height() != L.Width() || L.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLLNA: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    DistMatrix<T>
        XL(g), XR(g),
        X0(g), X1(g), X2(g);

    DistMatrix<T,VR,  STAR> X1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > X1Trans_STAR_MR(g);
    DistMatrix<T,MC,  STAR> Z1_MC_STAR(g);

    X1_VR_STAR.AlignWith( L );
    X1Trans_STAR_MR.AlignWith( L );
    Z1_MC_STAR.AlignWith( L );

    PartitionRight( X, XL, XR, 0 );
    while( XL.Width() < X.Width() )
    {
        RepartitionRight
        ( XL, /**/ XR,
          X0, /**/ X1, X2 );

        Zeros( X1.Height(), X1.Width(), Z1_MC_STAR );
        //--------------------------------------------------------------------//
        X1_VR_STAR = X1;
        X1Trans_STAR_MR.TransposeFrom( X1_VR_STAR );
        LocalTrmmAccumulateLLN
        ( TRANSPOSE, diag, alpha, L, X1Trans_STAR_MR, Z1_MC_STAR );

        X1.SumScatterFrom( Z1_MC_STAR );
        //--------------------------------------------------------------------//

        SlidePartitionRight
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 6
0
std::pair<int,double> slice_sample_multi(double x0, vector<slice_function*>& g, double w, int m)
{
  int N = g.size();

  vector<double> X0(N,x0);

  return slice_sample_multi(X0,g,w,m);
}
Esempio n. 7
0
inline void
TrmmRUNA
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T>& U,
                 DistMatrix<T>& X )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrmmRUNA");
    if( U.Grid() != X.Grid() )
        throw std::logic_error("{U,X} must be distributed over the same grid");
#endif
    const Grid& g = U.Grid();

    DistMatrix<T>
        XT(g),  X0(g),
        XB(g),  X1(g),
                X2(g);

    DistMatrix<T,STAR,VC  > X1_STAR_VC(g);
    DistMatrix<T,STAR,MC  > X1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> Z1Trans_MR_STAR(g);
    DistMatrix<T,MR,  MC  > Z1Trans_MR_MC(g);

    X1_STAR_VC.AlignWith( U );
    X1_STAR_MC.AlignWith( U );
    Z1Trans_MR_STAR.AlignWith( U );

    PartitionDown
    ( X, XT,
         XB, 0 );
    while( XT.Height() < X.Height() )
    {
        RepartitionDown
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );

        Z1Trans_MR_MC.AlignWith( X1 );
        //--------------------------------------------------------------------//
        X1_STAR_VC = X1;
        X1_STAR_MC = X1_STAR_VC;
        Zeros( Z1Trans_MR_STAR, X1.Width(), X1.Height() );
        LocalTrmmAccumulateRUN
        ( TRANSPOSE, diag, alpha, U, X1_STAR_MC, Z1Trans_MR_STAR );

        Z1Trans_MR_MC.SumScatterFrom( Z1Trans_MR_STAR );
        Transpose( Z1Trans_MR_MC.Matrix(), X1.Matrix() );
        //--------------------------------------------------------------------//
        Z1Trans_MR_MC.FreeAlignments();

        SlidePartitionDown
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );
    }
}
Esempio n. 8
0
Van_Der_Pol::Van_Der_Pol(void)
{
      lambda = 3.;

      Qw.resize(1);
      Qw(0,0)= 1.;
      Qv.resize(1);
      Qv(0,0)=0.1;
  
      X0.resize(2);
      X0(0) = 0.5;
      X0(1) = 0.5;
      R0.resize(2);
      R0.zero();
      R0(0,0)=0.;
      R0(1,1)=.1;
      Ts=.1;

}
Esempio n. 9
0
bool ACovarianceFunction::check_covariance_Kgrad_x(ACovarianceFunction& covar,mfloat_t relchange,mfloat_t threshold,bool check_diag)
{
	mfloat_t RV=0;
	//copy inputs for which we calculate gradients
	CovarInput X = covar.getX();
	CovarInput X0 = X;
	for (int ic=0;ic<X.cols();ic++)
	{
		//analytical gradient is per columns all in one go:
		MatrixXd Kgrad_x = covar.Kgrad_X(ic);
		MatrixXd Kgrad_x_diag = covar.Kdiag_grad_X(ic);
		for (int ir=0;ir<X.rows();ir++)
		{
			mfloat_t change = relchange*X0(ir,ic);
			change = std::max(change,1E-5);
			X(ir,ic) = X0(ir,ic) + change;
			covar.setX(X);
			MatrixXd Lplus = covar.K();
			X(ir,ic) = X0(ir,ic) - change;
			covar.setX(X);
			MatrixXd Lminus = covar.K();
			X(ir,ic) = X0(ir,ic);
			covar.setX(X);
			//numerical gradient
			MatrixXd diff_numerical = (Lplus-Lminus)/(2.*change);
			//build analytical gradient matrix
			MatrixXd diff_analytical = MatrixXd::Zero(X.rows(),X.rows());
			diff_analytical.row(ir) = Kgrad_x.row(ir);
			diff_analytical.col(ir) += Kgrad_x.row(ir);
			RV+= (diff_numerical-diff_analytical).squaredNorm();
			//difference
			if (check_diag)
			{
				double delta =(diff_numerical(ir,ir)-Kgrad_x_diag(ir));
				RV+= delta*delta;
			}
		} //end for ir
	}
	return (RV < threshold);
}
Esempio n. 10
0
Foam::tmp<Foam::pointField> Foam::RBD::rigidBodyMotion::transformPoints
(
    const label bodyID,
    const pointField& initialPoints
) const
{
    // Calculate the transform from the initial state in the global frame
    // to the current state in the global frame
    spatialTransform X(X0(bodyID).inv() & X00(bodyID));

    tmp<pointField> tpoints(new pointField(initialPoints.size()));
    pointField& points = tpoints.ref();

    forAll(points, i)
    {
        points[i] = X.transformPoint(initialPoints[i]);
    }
Esempio n. 11
0
inline void
TrsmLUNSmall
( UnitOrNonUnit diag,
  F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmLUNSmall");
    if( U.Grid() != X.Grid() )
        throw std::logic_error
        ("U and X must be distributed over the same grid");
    if( U.Height() != U.Width() || U.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrsmLUN: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
    if( U.ColAlignment() != X.ColAlignment() )
        throw std::logic_error("U and X are assumed to be aligned");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F,VC,STAR> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F,VC,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),
                                  X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,   U00, U01, /**/ U02,
               /**/        U10, U11, /**/ U12,
         /*************/  /******************/
          UBL, /**/ UBR,   U20, U21, /**/ U22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VC,* ]
        
        // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ]
        LocalTrsm
        ( LEFT, UPPER, NORMAL, diag,
          F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular );
        X1 = X1_STAR_STAR;

        // X0[VC,* ] -= U01[VC,* ] X1[* ,* ]
        LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 12
0
inline void
TrsmLUNLarge
( UnitOrNonUnit diag,
  F alpha, const DistMatrix<F>& U, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmLUNLarge");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<F,MC,  STAR> U01_MC_STAR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,STAR,MR  > X1_STAR_MR(g);
    DistMatrix<F,STAR,VR  > X1_STAR_VR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,   U00, U01, /**/ U02,
               /**/        U10, U11, /**/ U12,
         /*************/  /******************/
          UBL, /**/ UBR,   U20, U21, /**/ U22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        U01_MC_STAR.AlignWith( X0 );
        X1_STAR_MR.AlignWith( X0 );
        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR]
        X1_STAR_VR    = X1;  // X1[* ,VR] <- X1[MC,MR]
        
        // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR]
        LocalTrsm
        ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_VR,
          checkIfSingular );

        X1_STAR_MR  = X1_STAR_VR; // X1[* ,MR]  <- X1[* ,VR]
        X1          = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR]
        U01_MC_STAR = U01;        // U01[MC,* ] <- U01[MC,MR]

        // X0[MC,MR] -= U01[MC,* ] X1[* ,MR]
        LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 );
        //--------------------------------------------------------------------//
        U01_MC_STAR.FreeAlignments();
        X1_STAR_MR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 13
0
bool
ICBinaryArith_Int32::Compiler::generateStubCode(MacroAssembler& masm)
{
    // Guard that R0 is an integer and R1 is an integer.
    Label failure;
    masm.branchTestInt32(Assembler::NotEqual, R0, &failure);
    masm.branchTestInt32(Assembler::NotEqual, R1, &failure);

    // Add R0 and R1. Don't need to explicitly unbox, just use R2.
    Register Rscratch = R2_;
    ARMRegister Wscratch = ARMRegister(Rscratch, 32);
#ifdef MERGE
    // DIV and MOD need an extra non-volatile ValueOperand to hold R0.
    AllocatableGeneralRegisterSet savedRegs(availableGeneralRegs(2));
    savedRegs.set() = GeneralRegisterSet::Intersect(GeneralRegisterSet::NonVolatile(), savedRegs);
#endif
    // get some more ARM-y names for the registers
    ARMRegister W0(R0_, 32);
    ARMRegister X0(R0_, 64);
    ARMRegister W1(R1_, 32);
    ARMRegister X1(R1_, 64);
    ARMRegister WTemp(ExtractTemp0, 32);
    ARMRegister XTemp(ExtractTemp0, 64);
    Label maybeNegZero, revertRegister;
    switch(op_) {
      case JSOP_ADD:
        masm.Adds(WTemp, W0, Operand(W1));

        // Just jump to failure on overflow. R0 and R1 are preserved, so we can
        // just jump to the next stub.
        masm.j(Assembler::Overflow, &failure);

        // Box the result and return. We know R0 already contains the
        // integer tag, so we just need to move the payload into place.
        masm.movePayload(ExtractTemp0, R0_);
        break;

      case JSOP_SUB:
        masm.Subs(WTemp, W0, Operand(W1));
        masm.j(Assembler::Overflow, &failure);
        masm.movePayload(ExtractTemp0, R0_);
        break;

      case JSOP_MUL:
        masm.mul32(R0.valueReg(), R1.valueReg(), Rscratch, &failure, &maybeNegZero);
        masm.movePayload(Rscratch, R0_);
        break;

      case JSOP_DIV:
      case JSOP_MOD: {

        // Check for INT_MIN / -1, it results in a double.
        Label check2;
        masm.Cmp(W0, Operand(INT_MIN));
        masm.B(&check2, Assembler::NotEqual);
        masm.Cmp(W1, Operand(-1));
        masm.j(Assembler::Equal, &failure);
        masm.bind(&check2);
        Label no_fail;
        // Check for both division by zero and 0 / X with X < 0 (results in -0).
        masm.Cmp(W1, Operand(0));
        // If x > 0, then it can't be bad.
        masm.B(&no_fail, Assembler::GreaterThan);
        // if x == 0, then ignore any comparison, and force
        // it to fail, if x < 0 (the only other case)
        // then do the comparison, and fail if y == 0
        masm.Ccmp(W0, Operand(0), vixl::ZFlag, Assembler::NotEqual);
        masm.B(&failure, Assembler::Equal);
        masm.bind(&no_fail);
        masm.Sdiv(Wscratch, W0, W1);
        // Start calculating the remainder, x - (x / y) * y.
        masm.mul(WTemp, W1, Wscratch);
        if (op_ == JSOP_DIV) {
            // Result is a double if the remainder != 0, which happens
            // when (x/y)*y != x.
            masm.branch32(Assembler::NotEqual, R0.valueReg(), ExtractTemp0, &revertRegister);
            masm.movePayload(Rscratch, R0_);
        } else {
            // Calculate the actual mod. Set the condition code, so we can see if it is non-zero.
            masm.Subs(WTemp, W0, WTemp);

            // If X % Y == 0 and X < 0, the result is -0.
            masm.Ccmp(W0, Operand(0), vixl::NoFlag, Assembler::Equal);
            masm.branch(Assembler::LessThan, &revertRegister);
            masm.movePayload(ExtractTemp0, R0_);
        }
        break;
      }
        // ORR, EOR, AND can trivially be coerced int
        // working without affecting the tag of the dest..
      case JSOP_BITOR:
        masm.Orr(X0, X0, Operand(X1));
        break;
      case JSOP_BITXOR:
        masm.Eor(X0, X0, Operand(W1, vixl::UXTW));
        break;
      case JSOP_BITAND:
        masm.And(X0, X0, Operand(X1));
        break;
        // LSH, RSH and URSH can not.
      case JSOP_LSH:
        // ARM will happily try to shift by more than 0x1f.
        masm.Lsl(Wscratch, W0, W1);
        masm.movePayload(Rscratch, R0.valueReg());
        break;
      case JSOP_RSH:
        masm.Asr(Wscratch, W0, W1);
        masm.movePayload(Rscratch, R0.valueReg());
        break;
      case JSOP_URSH:
        masm.Lsr(Wscratch, W0, W1);
        if (allowDouble_) {
            Label toUint;
            // Testing for negative is equivalent to testing bit 31
            masm.Tbnz(Wscratch, 31, &toUint);
            // Move result and box for return.
            masm.movePayload(Rscratch, R0_);
            EmitReturnFromIC(masm);

            masm.bind(&toUint);
            masm.convertUInt32ToDouble(Rscratch, ScratchDoubleReg);
            masm.boxDouble(ScratchDoubleReg, R0, ScratchDoubleReg);
        } else {
            // Testing for negative is equivalent to testing bit 31
            masm.Tbnz(Wscratch, 31, &failure);
            // Move result for return.
            masm.movePayload(Rscratch, R0_);
        }
        break;
      default:
        MOZ_CRASH("Unhandled op for BinaryArith_Int32.");
    }

    EmitReturnFromIC(masm);

    switch (op_) {
      case JSOP_MUL:
        masm.bind(&maybeNegZero);

        // Result is -0 if exactly one of lhs or rhs is negative.
        masm.Cmn(W0, W1);
        masm.j(Assembler::Signed, &failure);

        // Result is +0, so use the zero register.
        masm.movePayload(rzr, R0_);
        EmitReturnFromIC(masm);
        break;
      case JSOP_DIV:
      case JSOP_MOD:
        masm.bind(&revertRegister);
        break;
      default:
        break;
    }

    // Failure case - jump to next stub.
    masm.bind(&failure);
    EmitStubGuardFailure(masm);

    return true;
}
Esempio n. 14
0
inline void
TrmmLLTA
( Orientation orientation, 
  UnitOrNonUnit diag,
  T alpha, 
  const DistMatrix<T>& L,
        DistMatrix<T>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmLLTA");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( orientation == NORMAL )
        throw std::logic_error
        ("TrmmLLTA expects a (Conjugate)Transpose option");
    if( L.Height() != L.Width() || L.Height() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLLTA: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<T> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);
    DistMatrix<T>
        XL(g), XR(g),
        X0(g), X1(g), X2(g);

    DistMatrix<T,MC,STAR> X1_MC_STAR(g);
    DistMatrix<T,MR,STAR> Z1_MR_STAR(g);
    DistMatrix<T,MR,MC  > Z1_MR_MC(g);

    X1_MC_STAR.AlignWith( L );
    Z1_MR_STAR.AlignWith( L );

    PartitionRight( X, XL, XR, 0 );
    while( XL.Width() < X.Width() )
    {
        RepartitionRight
        ( XL, /**/ XR,
          X0, /**/ X1, X2 );

        Zeros( X1.Height(), X1.Width(), Z1_MR_STAR );
        //--------------------------------------------------------------------//
        X1_MC_STAR = X1;
        LocalTrmmAccumulateLLT
        ( orientation, diag, alpha, L, X1_MC_STAR, Z1_MR_STAR );

        Z1_MR_MC.SumScatterFrom( Z1_MR_STAR );
        X1 = Z1_MR_MC;
        //--------------------------------------------------------------------//

        SlidePartitionRight
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 15
0
void benchmark_sort(
        const vex::Context &ctx, vex::profiler<> &prof
        )
{
    const size_t N = 16 * 1024 * 1024;
    const size_t M = 16;

    typedef typename std::conditional<
        std::is_same<float, real>::value, cl_uint, cl_ulong
        >::type key_type;

    std::default_random_engine rng( std::rand() );
    std::uniform_int_distribution<key_type> rnd;

    std::vector<key_type> x0(N);
    std::vector<key_type> x1(N);

    std::generate(x0.begin(), x0.end(), [&]() { return rnd(rng); });

    vex::vector<key_type> X0(ctx, x0);
    vex::vector<key_type> X1(ctx, N);

    X1 = X0;
    vex::sort(X1);

    double tot_time = 0;
    for(size_t i = 0; i < M; i++) {
        X1 = X0;
        ctx.finish();
        prof.tic_cpu("VexCL");
        vex::sort(X1);
        ctx.finish();
        tot_time += prof.toc("VexCL");
    }

    std::cout
        << "Sort (" << vex::type_name<key_type>() << ")\n"
        << "    VexCL:         " << N * M / tot_time << " keys/sec\n";

#ifdef HAVE_BOOST_COMPUTE
    X1 = X0;
    vex::compute::sort(X1);

    tot_time = 0;
    for(size_t i = 0; i < M; i++) {
        X1 = X0;
        ctx.finish();
        prof.tic_cpu("Boost.Compute");
        vex::compute::sort(X1);
        ctx.finish();
        tot_time += prof.toc("Boost.Compute");
    }

    std::cout
        << "    Boost.Compute: " << N * M / tot_time << " keys/sec\n";
#endif

#ifdef HAVE_CLOGS
    X1 = X0;
    vex::clogs::sort(X1);

    tot_time = 0;
    for(size_t i = 0; i < M; i++) {
        X1 = X0;
        ctx.finish();
        prof.tic_cpu("CLOGS");
        vex::clogs::sort(X1);
        ctx.finish();
        tot_time += prof.toc("CLOGS");
    }

    std::cout
        << "    CLOGS:         " << N * M / tot_time << " keys/sec\n";
#endif

    if (options.bm_cpu) {
        tot_time = 0;
        for(size_t i = 0; i < M; i++) {
            std::copy(x0.begin(), x0.end(), x1.begin());
            prof.tic_cpu("STL");
            std::sort(x1.begin(), x1.end());
            tot_time += prof.toc("STL");
        }

        std::cout << "    STL:           " << N * M / tot_time << " keys/sec\n";
    }

    std::cout << std::endl;
}
Esempio n. 16
0
static void
ec_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
{
#if 0
  /* NOTE: This code works only for limb sizes of 32 bit.  */
  mpi_limb_t *wp, *sp;

  if (ctx->nist_nbits == 192)
    {
      mpi_mul (w, u, v);
      mpi_resize (w, 12);
      wp = w->d;

      sp = ctx->s[0]->d;
      sp[0*2+0] = wp[0*2+0];
      sp[0*2+1] = wp[0*2+1];
      sp[1*2+0] = wp[1*2+0];
      sp[1*2+1] = wp[1*2+1];
      sp[2*2+0] = wp[2*2+0];
      sp[2*2+1] = wp[2*2+1];

      sp = ctx->s[1]->d;
      sp[0*2+0] = wp[3*2+0];
      sp[0*2+1] = wp[3*2+1];
      sp[1*2+0] = wp[3*2+0];
      sp[1*2+1] = wp[3*2+1];
      sp[2*2+0] = 0;
      sp[2*2+1] = 0;

      sp = ctx->s[2]->d;
      sp[0*2+0] = 0;
      sp[0*2+1] = 0;
      sp[1*2+0] = wp[4*2+0];
      sp[1*2+1] = wp[4*2+1];
      sp[2*2+0] = wp[4*2+0];
      sp[2*2+1] = wp[4*2+1];

      sp = ctx->s[3]->d;
      sp[0*2+0] = wp[5*2+0];
      sp[0*2+1] = wp[5*2+1];
      sp[1*2+0] = wp[5*2+0];
      sp[1*2+1] = wp[5*2+1];
      sp[2*2+0] = wp[5*2+0];
      sp[2*2+1] = wp[5*2+1];

      ctx->s[0]->nlimbs = 6;
      ctx->s[1]->nlimbs = 6;
      ctx->s[2]->nlimbs = 6;
      ctx->s[3]->nlimbs = 6;

      mpi_add (ctx->c, ctx->s[0], ctx->s[1]);
      mpi_add (ctx->c, ctx->c, ctx->s[2]);
      mpi_add (ctx->c, ctx->c, ctx->s[3]);

      while ( mpi_cmp (ctx->c, ctx->p ) >= 0 )
        mpi_sub ( ctx->c, ctx->c, ctx->p );
      mpi_set (w, ctx->c);
    }
  else if (ctx->nist_nbits == 384)
    {
      int i;
      mpi_mul (w, u, v);
      mpi_resize (w, 24);
      wp = w->d;

#define NEXT(a) do { ctx->s[(a)]->nlimbs = 12; \
                     sp = ctx->s[(a)]->d; \
                     i = 0; } while (0)
#define X(a) do { sp[i++] = wp[(a)];} while (0)
#define X0(a) do { sp[i++] = 0; } while (0)
      NEXT(0);
      X(0);X(1);X(2);X(3);X(4);X(5);X(6);X(7);X(8);X(9);X(10);X(11);
      NEXT(1);
      X0();X0();X0();X0();X(21);X(22);X(23);X0();X0();X0();X0();X0();
      NEXT(2);
      X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);X(20);X(21);X(22);X(23);
      NEXT(3);
      X(21);X(22);X(23);X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);X(20);
      NEXT(4);
      X0();X(23);X0();X(20);X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);
      NEXT(5);
      X0();X0();X0();X0();X(20);X(21);X(22);X(23);X0();X0();X0();X0();
      NEXT(6);
      X(20);X0();X0();X(21);X(22);X(23);X0();X0();X0();X0();X0();X0();
      NEXT(7);
      X(23);X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);X(20);X(21);X(22);
      NEXT(8);
      X0();X(20);X(21);X(22);X(23);X0();X0();X0();X0();X0();X0();X0();
      NEXT(9);
      X0();X0();X0();X(23);X(23);X0();X0();X0();X0();X0();X0();X0();
#undef X0
#undef X
#undef NEXT
      mpi_add (ctx->c, ctx->s[0], ctx->s[1]);
      mpi_add (ctx->c, ctx->c, ctx->s[1]);
      mpi_add (ctx->c, ctx->c, ctx->s[2]);
      mpi_add (ctx->c, ctx->c, ctx->s[3]);
      mpi_add (ctx->c, ctx->c, ctx->s[4]);
      mpi_add (ctx->c, ctx->c, ctx->s[5]);
      mpi_add (ctx->c, ctx->c, ctx->s[6]);
      mpi_sub (ctx->c, ctx->c, ctx->s[7]);
      mpi_sub (ctx->c, ctx->c, ctx->s[8]);
      mpi_sub (ctx->c, ctx->c, ctx->s[9]);

      while ( mpi_cmp (ctx->c, ctx->p ) >= 0 )
        mpi_sub ( ctx->c, ctx->c, ctx->p );
      while ( ctx->c->sign )
        mpi_add ( ctx->c, ctx->c, ctx->p );
      mpi_set (w, ctx->c);
    }
  else
#endif /*0*/
    mpi_mulm (w, u, v, ctx->p);
}
Esempio n. 17
0
inline void
TrsmLLTLarge
( Orientation orientation, UnitOrNonUnit diag,
  F alpha, const DistMatrix<F>& L, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmLLTLarge");
    if( orientation == NORMAL )
        throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option");
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<F> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);
    DistMatrix<F> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,MC  > L10_STAR_MC(g);
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,STAR,MR  > X1_STAR_MR(g);
    DistMatrix<F,STAR,VR  > X1_STAR_VR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 ); 

        L10_STAR_MC.AlignWith( X0 );
        X1_STAR_MR.AlignWith( X0 );
        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR]
        X1_STAR_VR    = X1;  // X1[* ,VR] <- X1[MC,MR]

        // X1[* ,VR] := L11^-[T/H][* ,* ] X1[* ,VR]
        LocalTrsm
        ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_VR,
          checkIfSingular );

        X1_STAR_MR  = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR]
        X1          = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR]
        L10_STAR_MC = L10;        // L10[* ,MC] <- L10[MC,MR]

        // X0[MC,MR] -= (L10[* ,MC])^(T/H) X1[* ,MR]
        //            = L10^[T/H][MC,* ] X1[* ,MR]
        LocalGemm
        ( orientation, NORMAL, F(-1), L10_STAR_MC, X1_STAR_MR, F(1), X0 );
        //--------------------------------------------------------------------//
        L10_STAR_MC.FreeAlignments();
        X1_STAR_MR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
void OpticalFlow::baseCalculate(cv::Mat& Im1, cv::Mat& Im2, flowUV& UV, const OpticalFlowParams& params){
	int rows = Im1.rows;
	int cols = Im1.cols;

	FlowOperator flowOp(rows, cols);
	FArray X0(2 * rows * cols, false);	

	FArray dUdV(2 * rows * cols, true, 0);
	cv::Mat Ix1(rows, cols, OPTFLOW_TYPE);
	cv::Mat Iy1(rows, cols, OPTFLOW_TYPE); 
	cv::Mat Ix(rows, cols, OPTFLOW_TYPE);
	cv::Mat Iy(rows, cols, OPTFLOW_TYPE); 
	getDXsCV(Im1, Ix1, Iy1);
	for (int i = 0; i < params.getIters(); ++i){
		cv::Mat Ix2(rows, cols, OPTFLOW_TYPE);
		cv::Mat Iy2(rows, cols, OPTFLOW_TYPE); 
		cv::Mat It(rows, cols, OPTFLOW_TYPE); 

		cv::Mat im2Warpped(rows, cols, Im1.type());
		WarpImage(Im2, UV.getU(), UV.getV(), im2Warpped);	
		
		getDXsCV(im2Warpped, Ix2, Iy2);
		Ix = params.getWeightedDeriveFactor() * (Ix1 + Ix2);
		Iy = params.getWeightedDeriveFactor() * (Iy1 + Iy2);
		cv::subtract(im2Warpped, Im1, It);

		if (params.getDisplayDerivativs()){
			cv::imshow("Derivative Ix", Ix);
			cv::imshow("Derivative Iy", Iy);
			cv::waitKey(1);
		}
		
		cv::Mat Du(rows, cols, OPTFLOW_TYPE, cv::Scalar(0));
		cv::Mat Dv(rows, cols, OPTFLOW_TYPE, cv::Scalar(0));


		for (int j = 0; j < params.getLinearIters(); ++j){
#if OPTFLOW_VERBOSE
			cout << "solving Ax=b with SOR ";
			clock_t start = std::clock();	
#endif		
			flowOp.construct(UV, Du, Dv, Ix, Iy, It, params);
			
			memcpy(X0.ptr, UV.getU().data, rows * cols * sizeof(float));
			memcpy(X0.ptr + (rows * cols), UV.getV().data, rows * cols * sizeof(float));
			//UtilsDebug::printCRSSparseMat(flowOp.getA(), "aaaa.txt");
			if (params.getCheckResidualTolerance()){
				LinearSolver::sparseMatSor(flowOp.getA(), X0 ,dUdV, flowOp.getb(), params.getOverRelaxation(), params.getSorIters(), params.getResidualTolerance());
			}else{
				//LinearSolver::multigrid(10,10,flowOp.getA(),flowOp.getb(), params.getResidualTolerance(), dUdV, 20, 20, LinearSolver::vCycle);
				LinearSolver::sparseMatSorNoResidual(flowOp.getA(), X0 ,dUdV, flowOp.getb(), params.getOverRelaxation(), params.getSorIters());
			}
#if OPTFLOW_VERBOSE
		std::cout<<" --- "<< (std::clock() - start) / (double)CLOCKS_PER_SEC <<'\n';
#endif

#if OPTFLOW_DEBUG
			for(int i = 0; i < dUdV.size(); ++i){
				if (!(dUdV.ptr[i] == dUdV.ptr[i])){
					cout << "ERROR - NAN";
				}
			}
#endif

			UtilsMat::clamp(dUdV, -1, 1);

			memcpy(Du.data, dUdV.ptr, rows * cols * sizeof(float));
			memcpy(Dv.data, dUdV.ptr + (rows * cols), rows * cols * sizeof(float));
			
			flowUV UV0(UV);
			UV.getU() += Du;
			UV.getV() += Dv;

			cv::Mat tmpU, tmpV;
			UV.getU().copyTo(tmpU);
			UV.getV().copyTo(tmpV);

			WeightedMedianFilter::computeMedianFilter(UV.getU(), UV.getV(), Im1, Im2, params.getMedianFilterRadius());

			Du = UV.getU() - UV0.getU();
			Dv = UV.getV() - UV0.getV();

			UV0.getU().copyTo(UV.getU());
			UV0.getV().copyTo(UV.getV());

			UV0.getU() += Du;
			UV0.getV() += Dv;
			if (params.isDisplay())
				UtilsFlow::DrawFlow(UV0.getU(), UV0.getV(), "Flow");
		}
		UV.getU() += Du;
		UV.getV() += Dv;
	}
}
static
int f0_val(int base)
{
  int id = X0(base);
  return id2val(id);
}  /* f0_val */
Esempio n. 20
0
inline void
TrsmRUT
( Orientation orientation, UnitOrNonUnit diag,
  F alpha, const DistMatrix<F>& U, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmRUT");
    if( orientation == NORMAL )
        throw std::logic_error("TrsmRUT expects a (Conjugate)Transpose option");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<F> XL(g), XR(g),
                  X0(g), X1(g), X2(g);

    // Temporary distributions
    DistMatrix<F,VR,  STAR> U01_VR_STAR(g);
    DistMatrix<F,STAR,MR  > U01AdjOrTrans_STAR_MR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,VC,  STAR> X1_VC_STAR(g);
    DistMatrix<F,STAR,MC  > X1Trans_STAR_MC(g);
    
    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionLeft( X, XL, XR, 0 );
    while( XL.Width() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        RepartitionLeft
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );

        X1_VC_STAR.AlignWith( X0 );
        X1Trans_STAR_MC.AlignWith( X0 );
        U01_VR_STAR.AlignWith( X0 );
        U01AdjOrTrans_STAR_MR.AlignWith( X0 );
        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11;
        X1_VC_STAR = X1; 

        LocalTrsm
        ( RIGHT, UPPER, orientation, diag, 
          F(1), U11_STAR_STAR, X1_VC_STAR, checkIfSingular );

        X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR );
        X1.TransposeFrom( X1Trans_STAR_MC );
        U01_VR_STAR = U01;
        if( orientation == ADJOINT )
            U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR );
        else
            U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR );

        // X0[MC,MR] -= X1[MC,* ] (U01[MR,* ])^(T/H)
        //            = X1^T[* ,MC] (U01^(T/H))[* ,MR]
        LocalGemm
        ( TRANSPOSE, NORMAL, 
          F(-1), X1Trans_STAR_MC, U01AdjOrTrans_STAR_MR, F(1), X0 );
        //--------------------------------------------------------------------//
        X1_VC_STAR.FreeAlignments();
        X1Trans_STAR_MC.FreeAlignments();
        U01_VR_STAR.FreeAlignments();
        U01AdjOrTrans_STAR_MR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12, 
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionLeft
        ( XL, /**/     XR,
          X0, /**/ X1, X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 21
0
inline void
TrmmLLNC
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T>& L,
                 DistMatrix<T>& X )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrmmLLNC");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( L.Height() != L.Width() || L.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLLNC: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<T> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);
    DistMatrix<T> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> L21_MC_STAR(g);
    DistMatrix<T,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<T,STAR,VR  > X1_STAR_VR(g);
    DistMatrix<T,MR,  STAR> X1Trans_MR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        L21_MC_STAR.AlignWith( X2 );
        X1Trans_MR_STAR.AlignWith( X2 );
        X1_STAR_VR.AlignWith( X1 );
        //--------------------------------------------------------------------//
        L21_MC_STAR = L21;
        X1Trans_MR_STAR.TransposeFrom( X1 );
        LocalGemm
        ( NORMAL, TRANSPOSE, T(1), L21_MC_STAR, X1Trans_MR_STAR, T(1), X2 );

        L11_STAR_STAR = L11;
        X1_STAR_VR.TransposeFrom( X1Trans_MR_STAR );
        LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR );
        X1 = X1_STAR_VR;
        //--------------------------------------------------------------------//
        L21_MC_STAR.FreeAlignments();
        X1Trans_MR_STAR.FreeAlignments();
        X1_STAR_VR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12, 
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
}
Esempio n. 22
0
inline void
TrmmRUNC
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T>& U,
                 DistMatrix<T>& X )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrmmRUNC");
    if( U.Grid() != X.Grid() )
        throw std::logic_error
        ("U and X must be distributed over the same grid");
    if( U.Height() != U.Width() || X.Width() != U.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmRUNC: \n"
            << "  U ~ " << U.Height() << " x " << U.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<T> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<T> XL(g), XR(g),
                  X0(g), X1(g), X2(g);

    // Temporary distributions
    DistMatrix<T,MR,  STAR> U12Trans_MR_STAR(g);
    DistMatrix<T,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<T,VC,  STAR> X1_VC_STAR(g);
    DistMatrix<T,MC,  STAR> X1_MC_STAR(g);
    
    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionLeft( X, XL, XR, 0 );
    while( XL.Width() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12, 
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        RepartitionLeft
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 ); 

        X1_MC_STAR.AlignWith( X2 );
        U12Trans_MR_STAR.AlignWith( X2 );
        X1_VC_STAR.AlignWith( X1 );
        //--------------------------------------------------------------------//
        X1_MC_STAR = X1;
        U12Trans_MR_STAR.TransposeFrom( U12 );
        LocalGemm
        ( NORMAL, TRANSPOSE, T(1), X1_MC_STAR, U12Trans_MR_STAR, T(1), X2 );

        U11_STAR_STAR = U11;
        X1_VC_STAR = X1_MC_STAR;
        LocalTrmm
        ( RIGHT, UPPER, NORMAL, diag, T(1), U11_STAR_STAR, X1_VC_STAR );
        X1 = X1_VC_STAR;
        //--------------------------------------------------------------------//
        X1_MC_STAR.FreeAlignments();
        U12Trans_MR_STAR.FreeAlignments();
        X1_VC_STAR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionLeft
        ( XL, /**/ XR,
          X0, /**/ X1, X2 );
    }
}
Esempio n. 23
0
inline void
TrsmLLTSmall
( Orientation orientation, UnitOrNonUnit diag,
  F alpha, const DistMatrix<F,STAR,VR>& L, DistMatrix<F,VR,STAR>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmLLTSmall");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( orientation == NORMAL )
        throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option");
    if( L.Height() != L.Width() || L.Height() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrsmLLT: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
    if( L.RowAlignment() != X.ColAlignment() )
        throw std::logic_error("L and X must be aligned");
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<F,STAR,VR> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<F,VR,STAR> XT(g),  X0(g),
                          XB(g),  X1(g),
                                  X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 ); 

        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11; // L11[* ,* ] <- L11[* ,VR]
        X1_STAR_STAR = X1;   // X1[* ,* ] <- X1[VR,* ]

        // X1[* ,* ] := L11^-[T/H][* ,* ] X1[* ,* ]
        LocalTrsm
        ( LEFT, LOWER, orientation, diag,
          F(1), L11_STAR_STAR, X1_STAR_STAR, checkIfSingular );

        X1 = X1_STAR_STAR;

        // X0[VR,* ] -= L10[* ,VR]^(T/H) X1[* ,* ]
        LocalGemm( orientation, NORMAL, F(-1), L10, X1_STAR_STAR, F(1), X0 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 24
0
inline void
TrmmRLNCOld
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T>& L,
                 DistMatrix<T>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmRLNCOld");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( L.Height() != L.Width() || X.Width() != L.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmRLNC: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<T> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<T> XL(g), XR(g),
                  X0(g), X1(g), X2(g);

    // Temporary distributions
    DistMatrix<T,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<T,MR,  STAR> L21_MR_STAR(g);
    DistMatrix<T,VC,  STAR> X1_VC_STAR(g);
    DistMatrix<T,MC,  STAR> D1_MC_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionRight( X, XL, XR, 0 );
    while( XR.Width() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );
 
        RepartitionRight
        ( XL, /**/ XR,
          X0, /**/ X1, X2 );

        L21_MR_STAR.AlignWith( X2 );
        D1_MC_STAR.AlignWith( X1 );
        Zeros( X1.Height(), X1.Width(), D1_MC_STAR );
        //--------------------------------------------------------------------//
        X1_VC_STAR = X1;
        L11_STAR_STAR = L11;
        LocalTrmm
        ( RIGHT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_VC_STAR );
        X1 = X1_VC_STAR;
 
        L21_MR_STAR = L21;
        LocalGemm( NORMAL, NORMAL, T(1), X2, L21_MR_STAR, T(0), D1_MC_STAR );
        X1.SumScatterUpdate( T(1), D1_MC_STAR );
        //--------------------------------------------------------------------//
        L21_MR_STAR.FreeAlignments();
        D1_MC_STAR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionRight
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 25
0
void StereogramWidget::paintEvent(QPaintEvent* event)
{
	m_radius = 0; //means that nothing has been drawn (yet ;)

	QLabel::paintEvent(event);
	QPainter painter(this);
	painter.setRenderHints(QPainter::Antialiasing | QPainter::TextAntialiasing, true);

	//pen
	QPen pen;
	pen.setStyle(Qt::SolidLine);
	pen.setBrush(QColor(Qt::black));

	int diameter = std::min(width(),height());
	int halfW = width()/2;
	int halfH = height()/2;
	QPoint center(halfW,halfH);

	int hsvThickness = 0;
	if (m_showHSVRing)
	{
		int newDiameter = static_cast<int>(ceil(0.9*static_cast<double>(diameter))); 
		hsvThickness = diameter - newDiameter;

		//TODO
		if (hsvThickness > 0)
		{
			QRect rectangle(center.x()-diameter/2+1,center.y()-diameter/2+1,diameter-2,diameter-2);
			int angle_span = static_cast<int>(m_angularStep_deg * 16.0); //see QPainter::drawPie
			QBrush brush;
			brush.setStyle(Qt::SolidPattern);
			painter.setPen(Qt::NoPen);

			//dip direction steps (dip dir. in [0,360])
			unsigned ddSteps = static_cast<unsigned>(ceil(360.0 / std::max(m_angularStep_deg,1.0)));
			for (unsigned j=0; j<ddSteps; ++j)
			{
				double dipDir_deg = static_cast<double>(j) * m_angularStep_deg;

				//set family color
				ccColor::Rgb col;
				FacetsClassifier::GenerateSubfamilyColor(col, 90.0, dipDir_deg + 0.5 * m_angularStep_deg, 0, 1);
				brush.setColor(QColor(	static_cast<int>(col.r),
										static_cast<int>(col.g),
										static_cast<int>(col.b),
										255));
				painter.setBrush(brush);

				int angle_start = static_cast<int>((360.0 - dipDir_deg - m_angularStep_deg + 90.0) * 16.0); //see QPainter::drawPie
				painter.drawPie(rectangle,angle_start,angle_span);
			}
		}

		diameter = newDiameter; 
	}

	//outer circle
	pen.setWidth(2);
	painter.setPen(pen);
	painter.setBrush(Qt::white);
	int radius = diameter/2 - 2;
	painter.drawEllipse(center,radius,radius);
	painter.setBrush(Qt::NoBrush);

	//keep track of the circle position
	m_radius = radius;
	m_center = center;

	//main axes
	painter.drawLine(center-QPoint(radius,0),center+QPoint(radius,0));
	painter.drawLine(center-QPoint(0,radius),center+QPoint(0,radius));

	//draw circles
	if (m_angularStep_deg > 0)
	{
		//dip steps (dip in [0,90])
		unsigned dSteps = static_cast<unsigned>(ceil(90.0 / m_angularStep_deg));
		//dip direction steps (dip dir. in [0,360])
		unsigned ddSteps = static_cast<unsigned>(ceil(360.0 / m_angularStep_deg));

		//draw inner circles
		pen.setWidth(1);
		pen.setColor(Qt::gray);
		painter.setPen(pen);
		for (unsigned i=1; i<dSteps; ++i)
		{
			double dip_deg = static_cast<double>(i) * m_angularStep_deg;
			if (dip_deg < 90.0)
			{
				int R = static_cast<int>(static_cast<double>(radius) * (dip_deg/90.0));
				if (R > 1)
					painter.drawEllipse(center,R-1,R-1);
			}
		}

		//draw rays (+ 'm_ticksFreq' times more ticks)
		int ticksFreq = std::max(m_ticksFreq,1);
		for (unsigned j=1; j<=ddSteps*ticksFreq; ++j)
		{
			double dipDir_deg = static_cast<double>(j) * m_angularStep_deg / static_cast<double>(ticksFreq);
			if (dipDir_deg < 360.0)
			{
				QPoint X(	 static_cast<int>(sin(dipDir_deg * CC_DEG_TO_RAD) * static_cast<double>(radius)),
					-static_cast<int>(cos(dipDir_deg * CC_DEG_TO_RAD) * static_cast<double>(radius)) );

				if ((j % ticksFreq) == 0) //long ticks
					painter.drawLine(center,center+X);
				else
					painter.drawLine(center+X*0.93,center+X);
			}
		}
	}

	//draw density map
	if (m_densityGrid && m_densityColorScale && m_densityGrid->grid && m_densityGrid->minMaxDensity[1] != 0)
	{
		assert(m_densityColorScale);
		assert(m_densityGrid->grid);

		QBrush brush;
		brush.setStyle(Qt::SolidPattern);
		painter.setPen(Qt::NoPen);
		QPolygon poly(4);

		const double* d = m_densityGrid->grid;
		for (unsigned j=0; j<m_densityGrid->ddSteps; ++j)
		{
			double dipDir0_rad = static_cast<double>(j) * m_densityGrid->step_deg * CC_DEG_TO_RAD;
			double dipDir1_rad = static_cast<double>(j+1) * m_densityGrid->step_deg * CC_DEG_TO_RAD;
			double cos_dipDir0 = cos(dipDir0_rad);
			double sin_dipDir0 = sin(dipDir0_rad);
			double cos_dipDir1 = cos(dipDir1_rad);
			double sin_dipDir1 = sin(dipDir1_rad);

			for (unsigned i=0; i<m_densityGrid->rSteps; ++i, ++d)
			{
				if (*d != 0)
				{
					double relPos = static_cast<double>(*d)/static_cast<double>(m_densityGrid->minMaxDensity[1]);
					const colorType* col = m_densityColorScale->getColorByRelativePos(relPos,m_densityColorScaleSteps);
					brush.setColor(QColor(	static_cast<int>(col[0]),
						static_cast<int>(col[1]),
						static_cast<int>(col[2]),
						255));
					painter.setBrush(brush);

					//stereographic projection
					//double dip0_rad = static_cast<double>(i) * m_densityGrid->step_deg * CC_DEG_TO_RAD;
					//double dip1_rad = static_cast<double>(i+1) * m_densityGrid->step_deg * CC_DEG_TO_RAD;
					//double R0 = static_cast<double>(radius) * cos(dip0_rad) / (1.0 + sin(dip0_rad));
					//double R1 = static_cast<double>(radius) * cos(dip1_rad) / (1.0 + sin(dip1_rad));
					double R0 = static_cast<double>(radius) * static_cast<double>(i) * m_densityGrid->step_R;
					double R1 = static_cast<double>(radius) * static_cast<double>(i+1) * m_densityGrid->step_R;

					poly.setPoint(0,center+QPoint(static_cast<int>(sin_dipDir0 * R0),-static_cast<int>(cos_dipDir0 * R0)));
					poly.setPoint(1,center+QPoint(static_cast<int>(sin_dipDir0 * R1),-static_cast<int>(cos_dipDir0 * R1)));
					poly.setPoint(2,center+QPoint(static_cast<int>(sin_dipDir1 * R1),-static_cast<int>(cos_dipDir1 * R1)));
					poly.setPoint(3,center+QPoint(static_cast<int>(sin_dipDir1 * R0),-static_cast<int>(cos_dipDir1 * R0)));

					painter.drawPolygon(poly);
				}
			}
		}
	}

	//draw main 'dip direction'
	if (m_meanDipDir_deg >= 0)
	{
		pen.setWidth(2);
		pen.setColor(Qt::red);
		painter.setPen(pen);
		//draw main direction
		QPoint X(	 static_cast<int>(sin(m_meanDipDir_deg * CC_DEG_TO_RAD) * static_cast<double>(radius)),
			-static_cast<int>(cos(m_meanDipDir_deg * CC_DEG_TO_RAD) * static_cast<double>(radius)) );
		pen.setStyle(Qt::DashLine);
		painter.setPen(pen);
		painter.drawLine(center,center+X);

		//draw orthogonal to main direction
		QPoint Y(	static_cast<int>(cos(m_meanDipDir_deg * CC_DEG_TO_RAD) * static_cast<double>(radius)),
			static_cast<int>(sin(m_meanDipDir_deg * CC_DEG_TO_RAD) * static_cast<double>(radius)) );
		pen.setStyle(Qt::SolidLine);
		painter.setPen(pen);
		painter.drawLine(center-Y,center+Y);
	}

	//draw filter window around last cliked point
	if (m_trackMouseClick)
	{
		pen.setWidth(2);
		pen.setColor(Qt::magenta);
		painter.setPen(pen);
		//QBrush brush;
		//brush.setStyle(Qt::Dense6Pattern);
		//brush.setColor(Qt::red);
		//painter.setBrush(brush);
		painter.setBrush(Qt::NoBrush);

		double R0 = static_cast<double>(radius) * (std::max(0.0,m_clickDip_deg-m_clickDipSpan_deg/2)/90.0);
		double R1 = static_cast<double>(radius) * (std::min(90.0,m_clickDip_deg+m_clickDipSpan_deg/2)/90.0);

		//draw radial limits
		{
			QPoint X0(	 static_cast<int>(sin((m_clickDipDir_deg-m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R0),
				-static_cast<int>(cos((m_clickDipDir_deg-m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R0) );
			QPoint X1(	 static_cast<int>(sin((m_clickDipDir_deg-m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R1),
				-static_cast<int>(cos((m_clickDipDir_deg-m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R1) );
			painter.drawLine(center+X0,center+X1);
		}
		{
			QPoint X0(	 static_cast<int>(sin((m_clickDipDir_deg+m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R0),
				-static_cast<int>(cos((m_clickDipDir_deg+m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R0) );
			QPoint X1(	 static_cast<int>(sin((m_clickDipDir_deg+m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R1),
				-static_cast<int>(cos((m_clickDipDir_deg+m_clickDipDirSpan_deg/2) * CC_DEG_TO_RAD) * R1) );
			painter.drawLine(center+X0,center+X1);
		}

		//draw concentric limits
		{
			int angle_start = static_cast<int>((360.0 - m_clickDipDir_deg - m_clickDipDirSpan_deg/2 + 90.0) * 16.0); //see QPainter::drawPie
			int angle_span = static_cast<int>(m_clickDipDirSpan_deg * 16.0); //see QPainter::drawPie

			QRectF rect0(static_cast<double>(center.x()) - R0,
				static_cast<double>(center.y()) - R0,
				2*R0, 2*R0);
			painter.drawArc(rect0,angle_start,angle_span);

			QRectF rect1(static_cast<double>(center.x()) - R1,
				static_cast<double>(center.y()) - R1,
				2*R1, 2*R1);
			painter.drawArc(rect1,angle_start,angle_span);
		}
	}
}
Esempio n. 26
0
inline void
TrsmRUN
( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    PushCallStack("internal::TrsmRUN");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);

    DistMatrix<F> XL(g), XR(g),
                  X0(g), X1(g), X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); 
    DistMatrix<F,STAR,MR  > U12_STAR_MR(g);
    DistMatrix<F,VC,  STAR> X1_VC_STAR(g);    
    DistMatrix<F,STAR,MC  > X1Trans_STAR_MC(g);
    
    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionDownDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionRight( X, XL, XR, 0 );
    while( XR.Width() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12, 
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        RepartitionRight
        ( XL, /**/     XR,
          X0, /**/ X1, X2 ); 

        X1_VC_STAR.AlignWith( X2 );
        X1Trans_STAR_MC.AlignWith( X2 );
        U12_STAR_MR.AlignWith( X2 );
        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11; 
        X1_VC_STAR = X1;

        LocalTrsm
        ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_VC_STAR,
          checkIfSingular );

        X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR );
        X1.TransposeFrom( X1Trans_STAR_MC );
        U12_STAR_MR = U12; 

        // X2[MC,MR] -= X1[MC,* ] U12[* ,MR]
        //            = X1^T[* ,MC] U12[* ,MR]
        LocalGemm
        ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, U12_STAR_MR, F(1), X2 );
        //--------------------------------------------------------------------//
        X1_VC_STAR.FreeAlignments();
        X1Trans_STAR_MC.FreeAlignments();
        U12_STAR_MR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( UTL, /**/ UTR,  U00, U01, /**/ U02,
               /**/       U10, U11, /**/ U12,
         /*************/ /******************/
          UBL, /**/ UBR,  U20, U21, /**/ U22 );

        SlidePartitionRight
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 27
0
void benchmark_scan(
        const vex::Context &ctx, vex::profiler<> &prof
        )
{
    const size_t N = 16 * 1024 * 1024;
    const size_t M = 16;

    typedef typename std::conditional<
        std::is_same<float, real>::value, cl_uint, cl_ulong
        >::type key_type;

    std::default_random_engine rng( std::rand() );
    std::uniform_int_distribution<key_type> rnd;

    std::vector<key_type> x0(N);
    std::vector<key_type> x1(N);

    std::generate(x0.begin(), x0.end(), [&]() { return rnd(rng); });

    vex::vector<key_type> X0(ctx, x0);
    vex::vector<key_type> X1(ctx, N);

    vex::exclusive_scan(X0, X1);

    ctx.finish();
    prof.tic_cpu("VexCL");

    for(size_t i = 0; i < M; i++)
        vex::exclusive_scan(X0, X1);

    ctx.finish();
    double tot_time = prof.toc("VexCL");

    std::cout
        << "Scan (" << vex::type_name<key_type>() << ")\n"
        << "    VexCL:         " << N * M / tot_time << " keys/sec\n";

#ifdef HAVE_BOOST_COMPUTE
    vex::compute::exclusive_scan(X0, X1);

    ctx.finish();
    prof.tic_cpu("Boost.Compute");

    for(size_t i = 0; i < M; i++)
        vex::compute::exclusive_scan(X0, X1);

    ctx.finish();
    tot_time = prof.toc("Boost.Compute");

    std::cout
        << "    Boost.Compute: " << N * M / tot_time << " keys/sec\n";
#endif

#ifdef HAVE_CLOGS
    vex::clogs::exclusive_scan(X0, X1);

    ctx.finish();
    prof.tic_cpu("CLOGS");

    for(size_t i = 0; i < M; i++)
        vex::clogs::exclusive_scan(X0, X1);

    ctx.finish();
    tot_time = prof.toc("CLOGS");

    std::cout
        << "    CLOGS:         " << N * M / tot_time << " keys/sec\n";
#endif

    if (options.bm_cpu) {
        prof.tic_cpu("CPU");
        for(size_t i = 0; i < M; i++) {
            key_type sum = key_type();
            for(size_t j = 0; j < N; ++j) {
                key_type next = sum + x0[j];
                x1[j] = sum;
                sum = next;
            }
        }
        tot_time = prof.toc("CPU");

        std::cout << "    CPU:           " << N * M / tot_time << " keys/sec\n";
    }

    std::cout << std::endl;
}
Esempio n. 28
0
inline void
TrsmRLT
( Orientation orientation, UnitOrNonUnit diag,
  F alpha, const DistMatrix<F>& L, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrsmRLT");
    if( orientation == NORMAL )
        LogicError("TrsmRLT expects a (Conjugate)Transpose option");
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<F> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);

    DistMatrix<F> XL(g), XR(g),
                  X0(g), X1(g), X2(g);

    // Temporary distributions
    DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<F,VR,  STAR> L21_VR_STAR(g);
    DistMatrix<F,STAR,MR  > L21AdjOrTrans_STAR_MR(g);
    DistMatrix<F,VC,  STAR> X1_VC_STAR(g);
    DistMatrix<F,STAR,MC  > X1Trans_STAR_MC(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionRight( X, XL, XR, 0 );
    while( XR.Width() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        RepartitionRight
        ( XL, /**/     XR,
          X0, /**/ X1, X2 );

        X1_VC_STAR.AlignWith( X2 );
        X1Trans_STAR_MC.AlignWith( X2 );
        L21_VR_STAR.AlignWith( X2 );
        L21AdjOrTrans_STAR_MR.AlignWith( X2 );
        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11; 
        X1_VC_STAR = X1;  
        
        LocalTrsm
        ( RIGHT, LOWER, orientation, diag, 
          F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular );

        X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR );
        X1.TransposeFrom( X1Trans_STAR_MC );
        L21_VR_STAR = L21;
        if( orientation == ADJOINT )
            L21AdjOrTrans_STAR_MR.AdjointFrom( L21_VR_STAR );
        else
            L21AdjOrTrans_STAR_MR.TransposeFrom( L21_VR_STAR );

        // X2[MC,MR] -= X1[MC,*] (L21[MR,*])^(T/H)
        //            = X1^T[* ,MC] (L21^(T/H))[*,MR]
        LocalGemm
        ( TRANSPOSE, NORMAL, 
          F(-1), X1Trans_STAR_MC, L21AdjOrTrans_STAR_MR, F(1), X2 );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionRight
        ( XL,     /**/ XR,
          X0, X1, /**/ X2 );
    }
}
Esempio n. 29
0
inline void
TrmmLLTCOld
( Orientation orientation, 
  UnitOrNonUnit diag,
  T alpha, 
  const DistMatrix<T>& L,
        DistMatrix<T>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmLLTCOld");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( orientation == NORMAL )
        throw std::logic_error("TrmmLLT expects a (Conjugate)Transpose option");
    if( L.Height() != L.Width() || L.Height() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLLTC: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<T> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);
    DistMatrix<T> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<T,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<T,MC,  STAR> L21_MC_STAR(g);
    DistMatrix<T,STAR,VR  > X1_STAR_VR(g);
    DistMatrix<T,MR,  STAR> D1AdjOrTrans_MR_STAR(g);
    DistMatrix<T,MR,  MC  > D1AdjOrTrans_MR_MC(g);
    DistMatrix<T,MC,  MR  > D1(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionDown
    ( X, XT,
         XB, 0 );
    while( XB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        RepartitionDown
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 ); 

        L21_MC_STAR.AlignWith( X2 );
        D1AdjOrTrans_MR_STAR.AlignWith( X1 );
        D1AdjOrTrans_MR_MC.AlignWith( X1 );
        D1.AlignWith( X1 );
        Zeros( X1.Width(), X1.Height(), D1AdjOrTrans_MR_STAR );
        Zeros( X1.Height(), X1.Width(), D1 );
        //--------------------------------------------------------------------//
        X1_STAR_VR = X1;
        L11_STAR_STAR = L11;
        LocalTrmm
        ( LEFT, LOWER, orientation, diag, T(1), L11_STAR_STAR, X1_STAR_VR );
        X1 = X1_STAR_VR;
 
        L21_MC_STAR = L21;
        LocalGemm
        ( orientation, NORMAL, 
          T(1), X2, L21_MC_STAR, T(0), D1AdjOrTrans_MR_STAR );
        D1AdjOrTrans_MR_MC.SumScatterFrom( D1AdjOrTrans_MR_STAR );
        if( orientation == TRANSPOSE )
            Transpose( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() );
        else
            Adjoint( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() );
        Axpy( T(1), D1, X1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();
        D1AdjOrTrans_MR_MC.FreeAlignments();
        D1AdjOrTrans_MR_STAR.FreeAlignments();
        L21_MC_STAR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionDown
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Esempio n. 30
0
inline void
TrsmLUNMedium
( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X,
  bool checkIfSingular )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrsmLUNMedium");
#endif
    const Grid& g = U.Grid();

    // Matrix views
    DistMatrix<F> 
        UTL(g), UTR(g),  U00(g), U01(g), U02(g),
        UBL(g), UBR(g),  U10(g), U11(g), U12(g),
                         U20(g), U21(g), U22(g);
    DistMatrix<F> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<F,MC,  STAR> U01_MC_STAR(g);
    DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
    DistMatrix<F,MR,  STAR> X1Trans_MR_STAR(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( U, UTL, UTR,
         UBL, UBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( UTL, /**/ UTR,   U00, U01, /**/ U02,
               /**/        U10, U11, /**/ U12,
         /*************/  /******************/
          UBL, /**/ UBR,   U20, U21, /**/ U22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        U01_MC_STAR.AlignWith( X0 );
        X1Trans_MR_STAR.AlignWith( X0 );
        //--------------------------------------------------------------------//
        U11_STAR_STAR = U11;                 // U11[* ,* ] <- U11[MC,MR]
        X1Trans_MR_STAR.TransposeFrom( X1 ); // X1[* ,MR] <- X1[MC,MR]
        
        // X1[* ,MR] := U11^-1[* ,* ] X1[* ,MR]
        //
        // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ]
        LocalTrsm
        ( RIGHT, UPPER, TRANSPOSE, diag, 
          F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular );
        X1.TransposeFrom( X1Trans_MR_STAR );

        U01_MC_STAR = U01;  // U01[MC,* ] <- U01[MC,MR]

        // X0[MC,MR] -= U01[MC,* ] X1[* ,MR]
        LocalGemm
        ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 );
        //--------------------------------------------------------------------//
        U01_MC_STAR.FreeAlignments();
        X1Trans_MR_STAR.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( UTL, /**/ UTR,  U00, /**/ U01, U02,
         /*************/ /******************/
               /**/       U10, /**/ U11, U12,
          UBL, /**/ UBR,  U20, /**/ U21, U22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
}