Exemple #1
0
Fichier : M.cpp Projet : GG31/ibm
 virtual void compute(const vd::Time& t)
 {
     f2 = DSAT() * C2();
     f1 = C1() * p1;
     grad(C3) = (f2);
     grad(C1) = 0 - (f1);
     grad(DSAT) = 0;
     grad(C2) = (f1) - (f2);
 }
	bool collision(const CircleHitBox &cercleBox1, const CircleHitBox &cercleBox2)
	{
		sf::Vector2f C1(cercleBox1.p), C2(cercleBox2.p);
		float d2 = (C1.x-C2.x)*(C1.x-C2.x) + (C1.y-C2.y)*(C1.y-C2.y);
		if (d2 > (cercleBox1.rayon + cercleBox2.rayon)*(cercleBox1.rayon + cercleBox2.rayon))
			return false;
		else
			return true;
	}
Exemple #3
0
double SpinAdapted::CreCreDesDes::redMatrixElement(Csf c1, vector<Csf>& ladder, const SpinBlock* b)
{
  assert( build_pattern == "(((CC)(D))(D))" );
  double element = 0.0;
  int I = get_orbs()[0]; 
  int J = get_orbs()[1];
  int K = get_orbs()[2];
  int L = get_orbs()[3];

  // Must take into account how the 4-index is built from a combination of the 2-index ops
  std::vector<SpinQuantum> quantum_ladder = get_quantum_ladder().at("(((CC)(D))(D))");
  assert( quantum_ladder.size() == 3 );

  SpinQuantum deltaQuantum12 = quantum_ladder.at(0);
  SpinQuantum deltaQuantum123 = quantum_ladder.at(1);
  SpinQuantum deltaQuantum1234 = quantum_ladder.at(2);
//FIXME components != 0
  deltaQuantum[0] = deltaQuantum1234;

  // Spin quantum data for CC
  IrrepSpace sym12 = deltaQuantum12.get_symm();
  int irrep12 = deltaQuantum12.get_symm().getirrep();
  int spin12 = deltaQuantum12.get_s().getirrep();
  // Spin quantum data for (CC)D
  IrrepSpace sym123 = deltaQuantum123.get_symm();
  int irrep123 = deltaQuantum123.get_symm().getirrep();
  int spin123= deltaQuantum123.get_s().getirrep();
  // Spin quantum data for total operator
  IrrepSpace sym1234 = deltaQuantum1234.get_symm();
  int irrep1234 = deltaQuantum1234.get_symm().getirrep();
  int spin1234 = deltaQuantum1234.get_s().getirrep();

  TensorOp C1(I, 1); 
  TensorOp C2(J, 1); 
  TensorOp D3(K,-1); 
  TensorOp D4(L,-1); 

  TensorOp CC = C1.product(C2, spin12, irrep12);
  TensorOp CCD = CC.product(D3, spin123, irrep123);
  TensorOp CCDD = CCD.product(D4, spin1234, irrep1234);

//FIXME loop over deltaQuantum components
  int j = 0;
  for (int i=0; i<ladder.size(); i++)
  {
    int index = 0; double cleb=0.0;
    if (nonZeroTensorComponent(c1, deltaQuantum[j], ladder[i], index, cleb)) {
      std::vector<double> MatElements = calcMatrixElements(c1, CCDD, ladder[i]) ;
      element = MatElements[index]/cleb;
      break;
    }
    else
      continue;
  }
  return element;
}
Exemple #4
0
double SpinAdapted::StackCreCreCreCre::redMatrixElement(Csf c1, vector<Csf>& ladder, const StackSpinBlock* b)
{
  assert( build_pattern == "(((CC)(C))(C))" );
  double element = 0.0;
  int I = get_orbs()[0]; 
  int J = get_orbs()[1];
  int K = get_orbs()[2];
  int L = get_orbs()[3];
  int Slaterlength = c1.det_rep.begin()->first.size();
  vector<bool> backupSlater1(Slaterlength,0), backupSlater2(Slaterlength,0);

  // Must take into account how the 4-index is built from a combination of the 2-index ops
  std::vector<SpinQuantum> quantum_ladder = get_quantum_ladder().at("(((CC)(C))(C))");
  assert( quantum_ladder.size() == 3 );

  SpinQuantum deltaQuantum12 = quantum_ladder.at(0);
  SpinQuantum deltaQuantum123 = quantum_ladder.at(1);
  SpinQuantum deltaQuantum1234 = quantum_ladder.at(2);
  deltaQuantum[0] = deltaQuantum1234;

  // Spin quantum data for CC
  IrrepSpace sym12 = deltaQuantum12.get_symm();
  int irrep12 = deltaQuantum12.get_symm().getirrep();
  int spin12 = deltaQuantum12.get_s().getirrep();
  // Spin quantum data for (CC)C
  IrrepSpace sym123 = deltaQuantum123.get_symm();
  int irrep123 = deltaQuantum123.get_symm().getirrep();
  int spin123= deltaQuantum123.get_s().getirrep();
  // Spin quantum data for total operator
  IrrepSpace sym1234 = deltaQuantum1234.get_symm();
  int irrep1234 = deltaQuantum1234.get_symm().getirrep();
  int spin1234 = deltaQuantum1234.get_s().getirrep();

  TensorOp C1(I, 1); 
  TensorOp C2(J, 1); 
  TensorOp C3(K, 1); 
  TensorOp C4(L, 1); 

  TensorOp CC = C1.product(C2, spin12, irrep12);
  TensorOp CCC = CC.product(C3, spin123, irrep123);
  TensorOp CCCC = CCC.product(C4, spin1234, irrep1234);

  for (int i=0; i<ladder.size(); i++)
  {
    int index = 0; double cleb=0.0;
    if (nonZeroTensorComponent(c1, deltaQuantum[0], ladder[i], index, cleb)) {
      std::vector<double> MatElements = calcMatrixElements(c1, CCCC, ladder[i], backupSlater1, backupSlater2) ;
      element = MatElements[index]/cleb;
      break;
    }
    else
      continue;
  }
  return element;
}
void RatioNSDistanceTransform::processRow(const BinaryPixelType *imageRow) {
    int col;
#define N1_SETMINUS_N2_COUNT  1
#define N2_SETMINUS_N1_COUNT  5
#define N1_CAP_N2_COUNT 3
    static vect n1[N1_SETMINUS_N2_COUNT]   = {{-1, 1}};
    static vect n2[N2_SETMINUS_N1_COUNT]   = {{1, 0}, {2, 0}, {2, 1}, {1, 2}, {2, 2}};
    static vect n12[N1_CAP_N2_COUNT] = {{0, 1}, {1, 1}, {0, 2}};

    for (col = 0; col < _cols; col++) {
	if (imageRow[col] == 0)
	    dtLines[0][col + 2] = 0;
	else {
	    GrayscalePixelType val;
	    GrayscalePixelType dt;
	    int k;

	    val = GRAYSCALE_MAX;
	    for (k = 0; k < N1_SETMINUS_N2_COUNT; k++) {
		assert(n1[k].y >= 0);
		assert(n1[k].y <= 2);
		assert(col + 2 - n1[k].x >= 0);
		assert(col + 2 - n1[k].x < _cols + 3);
		val = std::min(val, dtLines[n1[k].y][col + 2 - n1[k].x]);
	    }
	    assert(C1(d.num, d.den, (int) val) == d.mbf1i(d.mbf1(val)+1)+1);
	    dt = std::min((int)_dMax, d.mbf1i(d.mbf1(val)+1)+1);

	    val = GRAYSCALE_MAX;
	    for (k = 0; k < N2_SETMINUS_N1_COUNT; k++) {
		assert(n2[k].y >= 0);
		assert(n2[k].y <= 2);
		assert(col + 2 - n2[k].x >= 0);
		assert(col + 2 - n2[k].x < _cols + 3);
		val = std::min(val, dtLines[n2[k].y][col + 2 - n2[k].x]);
	    }
	    assert(C2(d.num, d.den, (int) val) == d.mbf2i(d.mbf2(val)+1)+1);
	    dt = std::min((int) dt, d.mbf2i(d.mbf2(val)+1)+1);

	    val = GRAYSCALE_MAX;
	    for (k = 0; k < N1_CAP_N2_COUNT; k++) {
		assert(n12[k].y >= 0);
		assert(n12[k].y <= 2);
		assert(col + 2 - n12[k].x >= 0);
		assert(col + 2 - n12[k].x < _cols + 3);
		val = std::min(val, dtLines[n12[k].y][col + 2 - n12[k].x]);
	    }
	    dt = std::min((int) dt, val + 1);

	    dtLines[0][col + 2] = dt;
	}
    }
    _consumer->processRow(dtLines[0]+2);
    rotate();
}
Molecule C2H4()
{
    int nAtoms = 6;

    Eigen::Vector3d C1(0.0000000000,  0.0000000000,  1.2578920000);
    Eigen::Vector3d H1(0.0000000000,  1.7454620000,  2.3427160000);
    Eigen::Vector3d H2(0.0000000000, -1.7454620000,  2.3427160000);
    Eigen::Vector3d C2(0.0000000000,  0.0000000000, -1.2578920000);
    Eigen::Vector3d H3(0.0000000000,  1.7454620000, -2.3427160000);
    Eigen::Vector3d H4(0.0000000000, -1.7454620000, -2.3427160000);

    Eigen::MatrixXd geom(3, nAtoms);
    geom.col(0) = C1.transpose();
    geom.col(1) = H1.transpose();
    geom.col(2) = H2.transpose();
    geom.col(3) = C2.transpose();
    geom.col(4) = H3.transpose();
    geom.col(5) = H4.transpose();
    Eigen::VectorXd charges(6), masses(6);
    charges << 6.0, 1.0, 1.0, 6.0, 1.0, 1.0;
    masses  << 12.00, 1.0078250, 1.0078250, 12.0, 1.0078250, 1.0078250;

    double radiusC = (1.70 * 1.20) / convertBohrToAngstrom;
    double radiusH = (1.20 * 1.20) / convertBohrToAngstrom;
    std::vector<Atom> atoms;
    atoms.push_back( Atom("Carbon",   "C", charges(0), masses(0), radiusC, C1, 1.0) );
    atoms.push_back( Atom("Hydrogen", "H", charges(1), masses(1), radiusH, H1, 1.0) );
    atoms.push_back( Atom("Hydrogen", "H", charges(2), masses(2), radiusH, H2, 1.0) );
    atoms.push_back( Atom("Carbon",   "C", charges(3), masses(3), radiusC, C2, 1.0) );
    atoms.push_back( Atom("Hydrogen", "H", charges(4), masses(4), radiusH, H3, 1.0) );
    atoms.push_back( Atom("Hydrogen", "H", charges(5), masses(5), radiusH, H4, 1.0) );

    std::vector<Sphere> spheres;
    Sphere sph1(C1, radiusC);
    Sphere sph2(H1, radiusH);
    Sphere sph3(H2, radiusH);
    Sphere sph4(C2, radiusC);
    Sphere sph5(H3, radiusH);
    Sphere sph6(H4, radiusH);
    spheres.push_back(sph1);
    spheres.push_back(sph2);
    spheres.push_back(sph3);
    spheres.push_back(sph4);
    spheres.push_back(sph5);
    spheres.push_back(sph6);

    // D2h as generated by Oxy, Oxz, Oyz
    Symmetry pGroup = buildGroup(3, 4, 2, 1);

    return Molecule(nAtoms, charges, masses, geom, atoms, spheres, pGroup);
};
// Update the coefficients associated with the patch field
void Foam::smoluchowskiJumpTFvPatchScalarField::updateCoeffs()
{
    if (updated())
    {
        return;
    }

    const fvPatchScalarField& pmu =
        patch().lookupPatchField<volScalarField, scalar>("mu");
    const fvPatchScalarField& prho =
        patch().lookupPatchField<volScalarField, scalar>("rho");
    const fvPatchField<scalar>& ppsi =
        patch().lookupPatchField<volScalarField, scalar>("psi");
    const fvPatchVectorField& pU =
        patch().lookupPatchField<volVectorField, vector>("U");

    // Prandtl number reading consistent with rhoCentralFoam
    const dictionary& thermophysicalProperties =
        db().lookupObject<IOdictionary>("thermophysicalProperties");

    dimensionedScalar Pr
    (
        dimensionedScalar::lookupOrDefault
        (
            "Pr",
            thermophysicalProperties,
            1.0
        )
    );

    Field<scalar> C2
    (
        pmu/prho
        *sqrt(ppsi*constant::mathematical::piByTwo)
        *2.0*gamma_/Pr.value()/(gamma_ + 1.0)
        *(2.0 - accommodationCoeff_)/accommodationCoeff_
    );

    Field<scalar> aCoeff(prho.snGrad() - prho/C2);
    Field<scalar> KEbyRho(0.5*magSqr(pU));

    valueFraction() = (1.0/(1.0 + patch().deltaCoeffs()*C2));
    refValue() = Twall_;
    refGrad() = 0.0;

    mixedFvPatchScalarField::updateCoeffs();
}
Exemple #8
0
int main(int argc, char **argv) {
  int m = 8000;
  int k = 3600;
  int n = 3600;
  int numsteps = 1;

  Matrix<double> A = RandomMatrix<double>(m, k);
  Matrix<double> B = RandomMatrix<double>(k, n);
  Matrix<double> C1(m, n), C2(m, n);
  
  Time([&] { MatMul(A, B, C1); }, "Classical gemm");
  Time([&] { grey433_29_234::FastMatmul(A, B, C2, numsteps); }, "Fast (4, 3, 3)");
  
  // Test for correctness.
  std::cout << "Maximum relative difference: " << MaxRelativeDiff(C1, C2) << std::endl;
  
  return 0;
}
Exemple #9
0
/*!
 * @param rxn  Reaction index of the current reaction. This is used
 *             as an index into vectors which have length n_total_rxn.
 * @param k    This is a vector of integer values specifying the
 *             species indices. The length of this vector species
 *             the number of different species in the description.
 *             The value of the entries are the species indices.
 *             These are used as indexes into vectors which have
 *             length n_total_species.
 *  @param order This is a vector of the same length as vector k.
 *         The order is used for the routine power(), which produces
 *         a power law expression involving the species vector.
 *  @param stoich  This is used to handle fractional stoichiometric coefficients
 *                 on the product side of irreversible reactions.
 */
void StoichManagerN::add(size_t rxn, const std::vector<size_t>& k, const vector_fp& order,
                         const vector_fp& stoich) {
    //printf ("add called\n");
    if (order.size() != k.size()) {
        throw CanteraError("StoichManagerN::add()", "size of order and species arrays differ");
    }
    if (stoich.size() != k.size()) {
        throw CanteraError("StoichManagerN::add()", "size of stoich and species arrays differ");
    }
    bool frac = false;
    for (size_t n = 0; n < stoich.size(); n++) {
        if (fmod(stoich[n], 1.0) || fmod(order[n], 1.0)) {
            frac = true;
            break;
        }
    }
    if (frac || k.size() > 3) {
        m_cn_list.push_back(C_AnyN(rxn, k, order, stoich));
    } else {
        // Try to express the reaction with unity stoichiometric
        // coefficients (by repeating species when necessary) so that the
        // simpler 'multiply' function can be used to compute the rate
        // instead of 'power'.
        std::vector<size_t> kRep;
        for (size_t n = 0; n < k.size(); n++) {
            for (size_t i = 0; i < stoich[n]; i++)
                kRep.push_back(k[n]);
        }

        switch (kRep.size()) {
        case 1:
            m_c1_list.push_back(C1(rxn, kRep[0]));
            break;
        case 2:
            m_c2_list.push_back(C2(rxn, kRep[0], kRep[1]));
            break;
        case 3:
            m_c3_list.push_back(C3(rxn, kRep[0], kRep[1], kRep[2]));
            break;
        default:
            m_cn_list.push_back(C_AnyN(rxn, k, order, stoich));
        }
    }
}
Exemple #10
0
int main(int argc, char **argv) {
  int m = 2000;
  int k = 1200;
  int n = 2000;
  int numsteps = 1;

  Matrix<double> A = RandomMatrix<double>(m, k);
  Matrix<double> B = RandomMatrix<double>(k, n);
  Matrix<double> C1(m, n), C2(m, n);

  Time([&] { MatMul(A, B, C1); }, "Classical gemm");
  Time([&] { classical222_8_24::FastMatmul(A, B, C2, numsteps); },
       "Classical recursive (2, 2, 2)");

  // Test for correctness.
  std::cout << "Maximum relative difference: " << MaxRelativeDiff(C1, C2) << std::endl;

  return 0;
}
int main()
{
    Array<int,2> A(2,3), B(2,3);

    A = 0, 3, 5, 
        1, 6, 9;
    B = 2, 5, 1,
        9, 3, 4;

    Array<int,2> C(A+2*B);
    Array<int,2> C2(2,3);
    C2 =  4, 13,  7,
         19, 12, 17;
    BZTEST(count(C2 == C) == 6); 

    beginCheckAssert();
    Array<int,2> D(i*10+j);
    endCheckAssert();
}
//======================================================================
tensor EightNode_Brick_u_p::getDampingTensorS( )
{
    int C2_dim[] = {Num_Nodes, Num_Nodes};
    tensor C2(2,C2_dim,0.0);
    tensor Cc2(2,C2_dim,0.0);

    double r  = 0.0;
    double rw = 0.0;
    double s  = 0.0;
    double sw = 0.0;
    double t  = 0.0;
    double tw = 0.0;
    double weight = 0.0;
    double det_of_Jacobian = 1.0;

    tensor Jacobian;
    tensor h;
    double Qqinv = (alpha-nf)/ks + nf/kf;

    int GP_c_r, GP_c_s, GP_c_t;

    for( GP_c_r = 0 ; GP_c_r < Num_IntegrationPts; GP_c_r++ ) {
      r = pts[GP_c_r];
      rw = wts[GP_c_r];
      for( GP_c_s = 0 ; GP_c_s < Num_IntegrationPts; GP_c_s++ ) {
        s = pts[GP_c_s];
        sw = wts[GP_c_s];
        for( GP_c_t = 0 ; GP_c_t < Num_IntegrationPts; GP_c_t++ ) {
          t = pts[GP_c_t];
          tw = wts[GP_c_t];
  	      h = shapeFunction(r,s,t);
	      Jacobian = this->Jacobian_3D(r,s,t);
	      det_of_Jacobian  = Jacobian.determinant();
	      weight = rw * sw * tw * det_of_Jacobian;
	      Cc2 = h("a")*h("k");
	      C2 += Cc2*(weight*Qqinv);
	   }
	}
    }

    return C2;
}
Exemple #13
0
Z H2(xpn){
  A z;
  ND2 I1;
  {I ar=a->r,an=a->n;XW;
   I bn=b0(a->p,an),wl=wr?*wd:1;
   aw=0;
   Q(bn<0,9)
   Q(ar>1,7)
   if(!wr) wl=wr=1;
   if(wn==1) aw=2;
   else Q(wl!=bn,8)
   if(wr==1&&wt!=Et){
     W(gv(wt,an))
     C2((I(*)())(!wt?(I(*)())x0:wt==Ft?(I(*)())x1:(I(*)())x2))
   }
   v=tr(wr-1,wd+1);
   u=wn;
   W(ga(t=wt,wr,an*v,wd))*z->d=an;
   C2(x3)
}}
Exemple #14
0
Z H2(cmp){
  A z;ND2 I1;
  {I ar=a->r,an=a->n;XW;
   I bn=b0(a->p,an),wl=wr?*wd:1;
   Q(bn==-1,9)
   aw=0;
   u=an==1;
   if((u)&&bn==1&&wr) R ic_or_copy(w);
   Q(ar>1,7)
   if(!wr) wl=wr=1;
   if(u) bn*=wl;
   else 
     if(wn==1) aw=2;
     else Q(wl!=an,8)
   if(wr==1&&wt!=Et&&bn>=0){
     W(gv(wt,bn))
     C2((!wt?(I(*)())c0:wt==Ft?(I(*)())c1:(I(*)())c2))
   }
   if(bn<0) bn=-bn;
   v=tr(wr-1,wd+1);
   W(ga(t=wt,wr,bn*v,wd))*z->d=bn;
   C2(c3)
}}
int main(int argc, char **argv) {
  int m = 90;
  int k = 90;
  int n = 90;
  int numsteps = 1;

  Matrix<double> A = RandomMatrix<double>(m, k);
  Matrix<double> B = RandomMatrix<double>(k, n);
  Matrix<double> C1(m, n), C2(m, n);
  MatMul(A, B, C1);

  for (int numsteps = 1; numsteps <= 2; ++numsteps) {
	double lambda = DBL_EPSILON;
	std::cout << numsteps << std::endl;
	while (lambda < 1) {
	  smirnov333_20_182_approx::FastMatmul(A, B, C2, numsteps, lambda);
	  std::cout << lambda << ", " << MaxRelativeDiff(C1, C2) << "; ";
	  lambda *= 2;
	}
	std::cout << std::endl;
  }

  return 0;
}
Exemple #16
0
cString cPlainKeyVia::PrintKeyNr(void)
{
  char tmp[12];
  const char *kn=tmp;
  switch(keynr) {
    case MBC3('T','P','S'):
      kn="TPS"; break;
    case MBC3('M','K',0): case MBC3('M','K',1): case MBC3('M','K',2):
    case MBC3('M','K',3): case MBC3('M','K',4): case MBC3('M','K',5):
    case MBC3('M','K',6): case MBC3('M','K',7): case MBC3('M','K',8):
    case MBC3('M','K',9):
      snprintf(tmp,sizeof(tmp),"TPSMK%d",C3(keynr)); break;
    default:
      {
      char c2=C2(keynr);
      if(c2=='D' || c2=='P' || c2=='X' || c2=='C' || c2=='E' || c2=='T')
        snprintf(tmp,sizeof(tmp),"%c%01X",c2,keynr & 0x0f);
      else
        snprintf(tmp,sizeof(tmp),"%02X",keynr);
      break;
      }
    }
  return kn;
}
Exemple #17
0
void Trr2kNNNT
( UpperOrLower uplo,
  Orientation orientationOfD,
  T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
           const DistMatrix<T>& C, const DistMatrix<T>& D,
  T beta,        DistMatrix<T>& E )
{
#ifndef RELEASE
    PushCallStack("internal::Trr2kNNNT");
    if( E.Height() != E.Width()  || A.Width()  != C.Width()  ||
        A.Height() != E.Height() || C.Height() != E.Height() ||
        B.Width()  != E.Width()  || D.Height() != E.Width()  ||
        A.Width()  != B.Height() || C.Width()  != D.Width() )
        throw std::logic_error("Nonconformal Trr2kNNNT");
#endif
    const Grid& g = E.Grid();

    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);

    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);
    DistMatrix<T> DL(g), DR(g),
                  D0(g), D1(g), D2(g);

    DistMatrix<T,MC,  STAR> A1_MC_STAR(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,MC,  STAR> C1_MC_STAR(g);
    DistMatrix<T,VR,  STAR> D1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > D1AdjOrTrans_STAR_MR(g);

    A1_MC_STAR.AlignWith( E );
    B1Trans_MR_STAR.AlignWith( E );
    C1_MC_STAR.AlignWith( E );
    D1_VR_STAR.AlignWith( E );
    D1AdjOrTrans_STAR_MR.AlignWith( E );

    LockedPartitionRight( A, AL, AR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    LockedPartitionRight( C, CL, CR, 0 );
    LockedPartitionRight( D, DL, DR, 0 );
    while( AL.Width() < A.Width() )
    {
        LockedRepartitionRight
        ( AL, /**/ AR,
          A0, /**/ A1, A2 );
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );
        LockedRepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );
        LockedRepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        //--------------------------------------------------------------------//
        A1_MC_STAR = A1;
        C1_MC_STAR = C1;
        B1Trans_MR_STAR.TransposeFrom( B1 );
        D1_VR_STAR = D1;
        if( orientationOfD == ADJOINT )
            D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR );
        else
            D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR );
        LocalTrr2k
        ( uplo, TRANSPOSE, 
          alpha, A1_MC_STAR, B1Trans_MR_STAR, 
                 C1_MC_STAR, D1AdjOrTrans_STAR_MR,
          beta,  E );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( DL,     /**/ DR,
          D0, D1, /**/ D2 );
        SlideLockedPartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );
        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #18
0
/* Subroutine */ int zlatzm_(char *side, integer *m, integer *n, 
	doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex *
	c1, doublecomplex *c2, integer *ldc, doublecomplex *work)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix. 
  

    Let P = I - tau*u*u',   u = ( 1 ),   
                                ( v )   
    where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if   
    SIDE = 'R'.   

    If SIDE equals 'L', let   
           C = [ C1 ] 1   
               [ C2 ] m-1   
                 n   
    Then C is overwritten by P*C.   

    If SIDE equals 'R', let   
           C = [ C1, C2 ] m   
                  1  n-1   
    Then C is overwritten by C*P.   

    Arguments   
    =========   

    SIDE    (input) CHARACTER*1   
            = 'L': form P * C   
            = 'R': form C * P   

    M       (input) INTEGER   
            The number of rows of the matrix C.   

    N       (input) INTEGER   
            The number of columns of the matrix C.   

    V       (input) COMPLEX*16 array, dimension   
                    (1 + (M-1)*abs(INCV)) if SIDE = 'L'   
                    (1 + (N-1)*abs(INCV)) if SIDE = 'R'   
            The vector v in the representation of P. V is not used   
            if TAU = 0.   

    INCV    (input) INTEGER   
            The increment between elements of v. INCV <> 0   

    TAU     (input) COMPLEX*16   
            The value tau in the representation of P.   

    C1      (input/output) COMPLEX*16 array, dimension   
                           (LDC,N) if SIDE = 'L'   
                           (M,1)   if SIDE = 'R'   
            On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1   
            if SIDE = 'R'.   

            On exit, the first row of P*C if SIDE = 'L', or the first   
            column of C*P if SIDE = 'R'.   

    C2      (input/output) COMPLEX*16 array, dimension   
                           (LDC, N)   if SIDE = 'L'   
                           (LDC, N-1) if SIDE = 'R'   
            On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the   
            m x (n - 1) matrix C2 if SIDE = 'R'.   

            On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P 
  
            if SIDE = 'R'.   

    LDC     (input) INTEGER   
            The leading dimension of the arrays C1 and C2.   
            LDC >= max(1,M).   

    WORK    (workspace) COMPLEX*16 array, dimension   
                        (N) if SIDE = 'L'   
                        (M) if SIDE = 'R'   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static doublecomplex c_b1 = {1.,0.};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1;
    doublecomplex z__1;
    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *), 
	    zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, integer *)
	    , zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, 
	    integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, 
	    integer *, doublecomplex *, integer *), zlacgv_(integer *, 
	    doublecomplex *, integer *);



#define V(I) v[(I)-1]
#define WORK(I) work[(I)-1]

#define C2(I,J) c2[(I)-1 + ((J)-1)* ( *ldc)]
#define C1(I,J) c1[(I)-1 + ((J)-1)* ( *ldc)]

    if (min(*m,*n) == 0 || tau->r == 0. && tau->i == 0.) {
	return 0;
    }

    if (lsame_(side, "L")) {

/*        w :=  conjg( C1 + v' * C2 ) */

	zcopy_(n, &C1(1,1), ldc, &WORK(1), &c__1);
	zlacgv_(n, &WORK(1), &c__1);
	i__1 = *m - 1;
	zgemv_("Conjugate transpose", &i__1, n, &c_b1, &C2(1,1), ldc, &
		V(1), incv, &c_b1, &WORK(1), &c__1);

/*        [ C1 ] := [ C1 ] - tau* [ 1 ] * w'   
          [ C2 ]    [ C2 ]        [ v ] */

	zlacgv_(n, &WORK(1), &c__1);
	z__1.r = -tau->r, z__1.i = -tau->i;
	zaxpy_(n, &z__1, &WORK(1), &c__1, &C1(1,1), ldc);
	i__1 = *m - 1;
	z__1.r = -tau->r, z__1.i = -tau->i;
	zgeru_(&i__1, n, &z__1, &V(1), incv, &WORK(1), &c__1, &C2(1,1), 
		ldc);

    } else if (lsame_(side, "R")) {

/*        w := C1 + C2 * v */

	zcopy_(m, &C1(1,1), &c__1, &WORK(1), &c__1);
	i__1 = *n - 1;
	zgemv_("No transpose", m, &i__1, &c_b1, &C2(1,1), ldc, &V(1), 
		incv, &c_b1, &WORK(1), &c__1);

/*        [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */

	z__1.r = -tau->r, z__1.i = -tau->i;
	zaxpy_(m, &z__1, &WORK(1), &c__1, &C1(1,1), &c__1);
	i__1 = *n - 1;
	z__1.r = -tau->r, z__1.i = -tau->i;
	zgerc_(m, &i__1, &z__1, &WORK(1), &c__1, &V(1), incv, &C2(1,1), 
		ldc);
    }

    return 0;

/*     End of ZLATZM */

} /* zlatzm_ */
int main(int, char**)
{
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_1;
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_2;

#if SYNTHETIC_INPUT
    Halide::Buffer<uint8_t> im1(10, 10);
    Halide::Buffer<uint8_t> im2(10, 10);

    for (int i = 0; i < 10; i++)
	    for (int j = 0; j < 10; j++)
	    {
		    im1(i, j) = (uint8_t) i*i+j*j;
		    im2(i, j) = (uint8_t) i*i+j*j;
	    }
#else
    Halide::Buffer<uint8_t> im1 = Halide::Tools::load_image("./utils/images/rgb.png");
    Halide::Buffer<uint8_t> im2 = Halide::Tools::load_image("./utils/images/rgb.png");
#endif

    Halide::Buffer<float> Ix_m(im1.width(), im1.height());
    Halide::Buffer<float> Iy_m(im1.width(), im1.height());
    Halide::Buffer<float> It_m(im1.width(), im1.height());
    Halide::Buffer<int> C1(_NC);
    Halide::Buffer<int> C2(_NC);
    Halide::Buffer<int> SIZES(2);
    Halide::Buffer<int> u(_NC);
    Halide::Buffer<int> v(_NC);
    Halide::Buffer<float> A(2, 4*w*w);
    Halide::Buffer<float> tA(4*w*w, 2);
    Halide::Buffer<double> pinvA(4*w*w, 2);
    Halide::Buffer<double> det(1);
    Halide::Buffer<float> tAA(2, 2);
    Halide::Buffer<double> X(2, 2);

    SIZES(0) = im1.height();
    SIZES(1) = im1.width();
    C1(0) = 500; C2(0) = 400;
    C1(1) = 800; C2(1) = 900;
    C1(2) = 200; C2(2) = 400;
    C1(3) = 400; C2(3) = 200;
    C1(4) = 400; C2(4) = 500;
    C1(5) = 800; C2(5) = 200;
    C1(6) = 200; C2(6) = 900;
    C1(7) = 900; C2(7) = 200;

    det(0) = 0;
    init_buffer(Ix_m, (float) 0);
    init_buffer(Iy_m, (float) 0);
    init_buffer(It_m, (float) 0);
    init_buffer(A, (float) 0);
    init_buffer(tA, (float) 0);
    init_buffer(pinvA, (double) 0);
    init_buffer(tAA, (float) 0);
    init_buffer(X, (double) 0);

    // Warm up
    optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(),
			  Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(),
			  C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer());

    // Tiramisu
    for (int i=0; i<NB_TESTS; i++)
    {
        auto start1 = std::chrono::high_resolution_clock::now();
        optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(),
			  Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(),
			  C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer());
        auto end1 = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double,std::milli> duration1 = end1 - start1;
        duration_vector_1.push_back(duration1);
    }

    std::cout << "Time: " << median(duration_vector_1) << std::endl;

#if SYNTHETIC_INPUT
    print_buffer(im1);
    print_buffer(im2);

    print_buffer(Ix_m);
    print_buffer(Iy_m);
    print_buffer(It_m);

    print_buffer(A);
    print_buffer(tA);
    print_buffer(tAA);
    print_buffer(det);
    print_buffer(X);
    print_buffer(pinvA);
#endif

    std::cout << "Output" << std::endl;

    print_buffer(u);
    print_buffer(v);

    return 0;
}
Exemple #20
0
inline void
GemmTTA
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTA expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTA: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> BT(g),  B0(g),
                  BB(g),  B1(g),
                          B2(g);
    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC  > B1_STAR_MC(g);
    DistMatrix<T,MR,  STAR> D1_MR_STAR(g);
    DistMatrix<T,MR,  MC  > D1_MR_MC(g);
    DistMatrix<T> D1(g);

    B1_STAR_MC.AlignWith( A ); 
    D1_MR_STAR.AlignWith( A );  

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( BB.Height() > 0 )
    {
        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionRight
        ( CL, /**/     CR,
          C0, /**/ C1, C2 );

        D1.AlignWith( C1 );  
        Zeros( C1.Height(), C1.Width(), D1_MR_STAR );
        //--------------------------------------------------------------------//
        B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR]

        // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T
        //           = alpha (A^T)[MR,MC] (B1^T)[MC,*]
        LocalGemm
        ( orientationOfA, orientationOfB, 
          alpha, A, B1_STAR_MC, T(0), D1_MR_STAR );

        // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols
        D1_MR_MC.SumScatterFrom( D1_MR_STAR );
        D1 = D1_MR_MC; 
        Axpy( T(1), D1, C1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();
        
        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 ); 
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #21
0
int main(int argc, char *argv[]) {

  int i, returnierr=0;

#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  // Uncomment to debug in parallel int tmp; if (Comm.MyPID()==0) cin >> tmp; Comm.Barrier();

  bool verbose = false;
  bool veryVerbose = false;

  // Check if we should print results to standard out
  if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true;

  // Check if we should print lots of results to standard out
  if (argc>2) if (argv[2][0]=='-' && argv[2][1]=='v') veryVerbose = true;

  if (verbose && Comm.MyPID()==0)
    std::cout << Epetra_Version() << std::endl << std::endl;

  if (!verbose) Comm.SetTracebackMode(0); // This should shut down any error traceback reporting

  if (verbose) std::cout << Comm << std::endl << std::flush;

  bool verbose1 = verbose;
  if (verbose) verbose = (Comm.MyPID()==0);

  bool veryVerbose1 = veryVerbose;
  if (veryVerbose) veryVerbose = (Comm.MyPID()==0);

  int NumMyElements = 100;
  if (veryVerbose1) NumMyElements = 10;
  NumMyElements += Comm.MyPID();
  int MaxNumMyElements = NumMyElements+Comm.NumProc()-1;
  int * ElementSizeList = new int[NumMyElements];
  long long * MyGlobalElements = new long long[NumMyElements];

  for (i = 0; i<NumMyElements; i++) {
    MyGlobalElements[i] = (Comm.MyPID()*MaxNumMyElements+i)*2;
    ElementSizeList[i] = i%6 + 2; // elementsizes go from 2 to 7
  }

  Epetra_BlockMap Map(-1LL, NumMyElements, MyGlobalElements, ElementSizeList,
		      0, Comm);

  delete [] ElementSizeList;
  delete [] MyGlobalElements;

  Epetra_MapColoring C0(Map);

  int * elementColors = new int[NumMyElements];

  int maxcolor = 24;
  int * colorCount = new int[maxcolor];
  int ** colorLIDs = new int*[maxcolor];
  for (i=0; i<maxcolor; i++) colorCount[i] = 0;
  for (i=0; i<maxcolor; i++) colorLIDs[i] = 0;

  int defaultColor = C0.DefaultColor();
  for (i=0; i<Map.NumMyElements(); i++) {
    assert(C0[i]==defaultColor);
    assert(C0(Map.GID64(i))==defaultColor);
    if (i%2==0) C0[i] = i%6+5+i%14; // cycle through 5...23 on even elements
    else C0(Map.GID64(i)) = i%5+1; // cycle through 1...5 on odd elements
    elementColors[i] = C0[i]; // Record color of ith element for use below
    colorCount[C0[i]]++; // Count how many of each color for checking below
  }
  
  if (veryVerbose)
    std::cout << "Original Map Coloring using element-by-element definitions" << std::endl;
  if (veryVerbose1)
    std::cout <<  C0 << std::endl;

  int numColors = 0;
  for (i=0; i<maxcolor; i++) 
    if (colorCount[i]>0) {
      numColors++;
      colorLIDs[i] = new int[colorCount[i]];
    }
  for (i=0; i<maxcolor; i++) colorCount[i] = 0;
  for (i=0; i<Map.NumMyElements(); i++) colorLIDs[C0[i]][colorCount[C0[i]]++] = i;

  

  int newDefaultColor = -1;
  Epetra_MapColoring C1(Map, elementColors, newDefaultColor);
  if (veryVerbose)
    std::cout << "Same Map Coloring using one-time construction" << std::endl;
  if (veryVerbose1)
    std::cout <<  C1 << std::endl;
  assert(C1.DefaultColor()==newDefaultColor);
  for (i=0; i<Map.NumMyElements(); i++) assert(C1[i]==C0[i]);

  Epetra_MapColoring C2(C1);
  if (veryVerbose)
    std::cout << "Same Map Coloring using copy constructor" << std::endl;
  if (veryVerbose1)
    std::cout <<  C1 << std::endl;
  for (i=0; i<Map.NumMyElements(); i++) assert(C2[i]==C0[i]);
  assert(C2.DefaultColor()==newDefaultColor);

  assert(numColors==C2.NumColors());

  for (i=0; i<maxcolor; i++) {
    int curNumElementsWithColor = C2.NumElementsWithColor(i);
    assert(colorCount[i]==curNumElementsWithColor);
    int * curColorLIDList = C2.ColorLIDList(i);
    if (curNumElementsWithColor==0) {
      assert(curColorLIDList==0);
    }
    else
      for (int j=0; j<curNumElementsWithColor; j++) assert(curColorLIDList[j]==colorLIDs[i][j]);
  }
  int curColor = 1;
  Epetra_Map * Map1 = C2.GenerateMap(curColor);
  Epetra_BlockMap * Map2 = C2.GenerateBlockMap(curColor);

  assert(Map1->NumMyElements()==colorCount[curColor]);
  assert(Map2->NumMyElements()==colorCount[curColor]);

  for (i=0; i<Map1->NumMyElements(); i++) {
    assert(Map1->GID64(i)==Map.GID64(colorLIDs[curColor][i]));
    assert(Map2->GID64(i)==Map.GID64(colorLIDs[curColor][i]));
    assert(Map2->ElementSize(i)==Map.ElementSize(colorLIDs[curColor][i]));
  }

  // Now test data redistribution capabilities


  Epetra_Map ContiguousMap(-1LL, Map.NumMyElements(), Map.IndexBase64(), Comm);
  // This vector contains the element sizes for the original map.
  Epetra_IntVector elementSizes(Copy, ContiguousMap, Map.ElementSizeList());
  Epetra_LongLongVector elementIDs(Copy, ContiguousMap, Map.MyGlobalElements64());
  Epetra_IntVector elementColorValues(Copy, ContiguousMap, C2.ElementColors());


  long long NumMyElements0 = 0;
  if (Comm.MyPID()==0) NumMyElements0 = Map.NumGlobalElements64();
  Epetra_Map CMap0(-1LL, NumMyElements0, Map.IndexBase64(), Comm);
  Epetra_Import importer(CMap0, ContiguousMap);
  Epetra_IntVector elementSizes0(CMap0);
  Epetra_LongLongVector elementIDs0(CMap0);
  Epetra_IntVector elementColorValues0(CMap0);
  elementSizes0.Import(elementSizes, importer, Insert);
  elementIDs0.Import(elementIDs, importer, Insert);
  elementColorValues0.Import(elementColorValues, importer, Insert);

  Epetra_BlockMap MapOnPE0(-1LL,NumMyElements0, elementIDs0.Values(), 
			   elementSizes0.Values(), Map.IndexBase64(), Comm);

  Epetra_Import importer1(MapOnPE0, Map);
  Epetra_MapColoring ColoringOnPE0(MapOnPE0);
  ColoringOnPE0.Import(C2, importer1, Insert);

  for (i=0; i<MapOnPE0.NumMyElements(); i++)
    assert(ColoringOnPE0[i]==elementColorValues0[i]);

  if (veryVerbose)
    std::cout << "Same Map Coloring on PE 0 only" << std::endl;
  if (veryVerbose1)
    std::cout <<  ColoringOnPE0 << std::endl;
  Epetra_MapColoring C3(Map);
  C3.Export(ColoringOnPE0, importer1, Insert);
  for (i=0; i<Map.NumMyElements(); i++) assert(C3[i]==C2[i]);
  if (veryVerbose)
    std::cout << "Same Map Coloring after Import/Export exercise" << std::endl;
  if (veryVerbose1)
    std::cout <<  ColoringOnPE0 << std::endl;
   
  
  if (verbose) std::cout << "Checked OK\n\n" << std::endl;

  if (verbose1) {
    if (verbose) std::cout << "Test ostream << operator" << std::endl << std::flush;
    std::cout << C0 << std::endl;
  }
	

  delete [] elementColors;
  for (i=0; i<maxcolor; i++) if (colorLIDs[i]!=0) delete [] colorLIDs[i];
  delete [] colorLIDs;
  delete [] colorCount;

  delete Map1;
  delete Map2;


#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  return returnierr;
}
Exemple #22
0
inline void
SymmLLA
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmLLA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    DistMatrix<T> 
        BL(g), BR(g),
        B0(g), B1(g), B2(g);

    DistMatrix<T>
        CL(g), CR(g),
        C0(g), C1(g), C2(g);

    DistMatrix<T,MC,STAR> B1_MC_STAR(g);
    DistMatrix<T,VR,STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g);
    DistMatrix<T> Z1(g);
    DistMatrix<T,MC,STAR> Z1_MC_STAR(g);
    DistMatrix<T,MR,STAR> Z1_MR_STAR(g);
    DistMatrix<T,MR,MC  > Z1_MR_MC(g);

    B1_MC_STAR.AlignWith( A );
    B1_VR_STAR.AlignWith( A );
    B1Trans_STAR_MR.AlignWith( A );
    Z1_MC_STAR.AlignWith( A );
    Z1_MR_STAR.AlignWith( A );

    Scale( beta, C );
    LockedPartitionRight
    ( B, BL, BR, 0 );
    PartitionRight
    ( C, CL, CR, 0 );
    while( CL.Width() < C.Width() )
    {
        LockedRepartitionRight 
        ( BL, /**/ BR,
          B0, /**/ B1, B2 );

        RepartitionRight
        ( CL, /**/ CR,
          C0, /**/ C1, C2 );

        Z1.AlignWith( C1 );
        Zeros( C1.Height(), C1.Width(), Z1_MC_STAR );
        Zeros( C1.Height(), C1.Width(), Z1_MR_STAR );
        //--------------------------------------------------------------------//
        B1_MC_STAR = B1;
        B1_VR_STAR = B1_MC_STAR;
        B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR );
        LocalSymmetricAccumulateLL
        ( TRANSPOSE, 
          alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR );

        Z1_MR_MC.SumScatterFrom( Z1_MR_STAR );
        Z1 = Z1_MR_MC;
        Z1.SumScatterUpdate( T(1), Z1_MC_STAR );
        Axpy( T(1), Z1, C1 );
        //--------------------------------------------------------------------//
        Z1.FreeAlignments();

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #23
0
inline void
GemmNNA
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNA");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNA: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> BL(g), BR(g),
                  B0(g), B1(g), B2(g);
    DistMatrix<T> CL(g), CR(g),
                  C0(g), C1(g), C2(g);

    // Temporary distributions
    DistMatrix<T,VR,STAR> B1_VR_STAR(g);
    DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g);
    DistMatrix<T,MC,STAR> D1_MC_STAR(g);

    B1_VR_STAR.AlignWith( A );
    B1Trans_STAR_MR.AlignWith( A );
    D1_MC_STAR.AlignWith( A );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionRight( B, BL, BR, 0 );
    PartitionRight( C, CL, CR, 0 );
    while( BR.Width() > 0 )
    {
        LockedRepartitionRight
        ( BL, /**/     BR,
          B0, /**/ B1, B2 );

        RepartitionRight
        ( CL, /**/     CR,
          C0, /**/ C1, C2 );

        Zeros( C1.Height(), C1.Width(), D1_MC_STAR );
        //--------------------------------------------------------------------//
        B1_VR_STAR = B1;
        B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR );

        // D1[MC,*] := alpha A[MC,MR] B1[MR,*]
        LocalGemm
        ( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, T(0), D1_MC_STAR );

        // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows
        C1.SumScatterUpdate( T(1), D1_MC_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionRight
        ( BL,     /**/ BR,
          B0, B1, /**/ B2 );

        SlidePartitionRight
        ( CL,     /**/ CR,
          C0, C1, /**/ C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
bool CollisionManager::isIntersectsPolygonPolygon(ICollisionHull* polygon1, ICollisionHull* polygon2)
{
    PoligonCollisionHull* poligonCH1 = dynamic_cast<PoligonCollisionHull*>(polygon1);
    PoligonCollisionHull* poligonCH2 = dynamic_cast<PoligonCollisionHull*>(polygon2);

    PointList points1 = poligonCH1->getPoints();
    PointList points2 = poligonCH2->getPoints();

    std::vector<float> A1(points1.size());
    std::vector<float> B1(points1.size());
    std::vector<float> C1(points1.size());

    std::vector<float> A2(points2.size());
    std::vector<float> B2(points2.size());
    std::vector<float> C2(points2.size());

    int i0, i1;
    float D;
    Vector3 P0, P1;
    for(int i = 0; i < points1.size(); i++)
    {
        i0 = i;
        i1 = (i == (points1.size() - 1)) ? 0 : i + 1;

        P0 = points1[i0];
        P1 = points1[i1];

        A1[i] = P0._y - P1._y;
        B1[i] = P1._x - P0._x;
        C1[i] = (P0._x * P1._y) - (P1._x * P0._y);
    }

    for(int i = 0; i < points2.size(); i++)
    {
        i0 = i;
        i1 = (i == (points2.size() - 1)) ? 0 : i + 1;

        P0 = points2[i0];
        P1 = points2[i1];

        A2[i] = P0._y - P1._y;
        B2[i] = P1._x - P0._x;
        C2[i] = (P0._x * P1._y) - (P1._x * P0._y);
    }

    //cheking 1 against 2
    for(int i = 0; i < points1.size(); i++)
    {
        for(int j = 0; j < points2.size(); j++)
        {
            P0 = points1[i];
            D = (P0._x * A2[j]) + (P0._y * B2[j]) + C2[j];
            if(D > 0)
            {
                return true;
            }
        }
    }//cheking 1 against 2

    //cheking 2 against 1
    for(int i = 0; i < points2.size(); i++)
    {
        for(int j = 0; j < points1.size(); j++)
        {
            P0 = points2[i];
            D = (P0._x * A1[j]) + (P0._y * B1[j]) + C1[j];
            if(D > 0)
            {
                return true;
            }
        }
    }//cheking 2 against 1

    return false;
}
Exemple #25
0
inline void 
GemmNNDot
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNDot");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNDot: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    if( A.Height() > B.Width() )
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g),
                      CB(g), C1(g), C10(g), C11(g), C12(g),
                             C2(g);

        // Temporary distributions
        DistMatrix<T,STAR,VC> A1_STAR_VC(g);
        DistMatrix<T,VC,STAR> B1_VC_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionDown
        ( A, AT,
             AB, 0 );
        PartitionDown
        ( C, CT,
             CB, 0 );
        while( AB.Height() > 0 )
        {
            LockedRepartitionDown
            ( AT,  A0,
             /**/ /**/
                   A1,
              AB,  A2 );

            RepartitionDown
            ( CT,  C0,
             /**/ /**/
                   C1,
              CB,  C2 );

            A1_STAR_VC = A1; 
            B1_VC_STAR.AlignWith( A1_STAR_VC );

            LockedPartitionRight( B, BL, BR, 0 );
            PartitionRight( C1, C1L, C1R, 0 );
            while( BR.Width() > 0 )
            {
                LockedRepartitionRight
                ( BL, /**/ BR,
                  B0, /**/ B1, B2 );

                RepartitionRight
                ( C1L, /**/ C1R,
                  C10, /**/ C11, C12 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                B1_VC_STAR = B1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionRight
                ( BL,     /**/ BR,
                  B0, B1, /**/ B2 );

                SlidePartitionRight
                ( C1L,      /**/ C1R,
                  C10, C11, /**/ C12 );
            }
            B1_VC_STAR.FreeAlignments();

            SlideLockedPartitionDown
            ( AT,  A0,
                   A1,
             /**/ /**/
              AB,  A2 );

            SlidePartitionDown
            ( CT,  C0,
                   C1,
             /**/ /**/
              CB,  C2 );
        }
    }
    else
    {
        // Matrix views
        DistMatrix<T> AT(g), AB(g),
                      A0(g), A1(g), A2(g);         
        DistMatrix<T> BL(g),  B0(g),
                      BR(g),  B1(g),
                              B2(g);
        DistMatrix<T> 
            CL(g), CR(g),         C1T(g),  C01(g),
            C0(g), C1(g), C2(g),  C1B(g),  C11(g),
                                           C21(g);

        // Temporary distributions
        DistMatrix<T,STAR,VR> A1_STAR_VR(g);
        DistMatrix<T,VR,STAR> B1_VR_STAR(g);
        DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);

        // Star the algorithm
        Scale( beta, C );
        LockedPartitionRight( B, BL, BR, 0 );
        PartitionRight( C, CL, CR, 0 );
        while( BR.Width() > 0 )
        {
            LockedRepartitionRight
            ( BL, /**/ BR,
              B0, /**/ B1, B2 );

            RepartitionRight
            ( CL, /**/ CR,
              C0, /**/ C1, C2 );

            B1_VR_STAR = B1;
            A1_STAR_VR.AlignWith( B1_VR_STAR );

            LockedPartitionDown
            ( A, AT,
                 AB, 0 );
            PartitionDown
            ( C1, C1T,
                  C1B, 0 );
            while( AB.Height() > 0 )
            {
                LockedRepartitionDown
                ( AT,  A0,
                 /**/ /**/
                       A1,
                  AB,  A2 );

                RepartitionDown
                ( C1T,  C01,
                 /***/ /***/
                        C11,
                  C1B,  C21 );

                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                //------------------------------------------------------------//
                A1_STAR_VR = A1;
                LocalGemm
                ( NORMAL, NORMAL, 
                  alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR );
                C11.SumScatterUpdate( T(1), C11_STAR_STAR );
                //------------------------------------------------------------//

                SlideLockedPartitionDown
                ( AT,  A0,
                       A1,
                 /**/ /**/
                  AB,  A2 );

                SlidePartitionDown
                ( C1T,  C01,
                        C11,
                 /***/ /***/
                  C1B,  C21 );
            }
            A1_STAR_VR.FreeAlignments();

            SlideLockedPartitionRight
            ( BL,     /**/ BR,
              B0, B1, /**/ B2 ); 

            SlidePartitionRight
            ( CL,     /**/ CR,
              C0, C1, /**/ C2 );
        }
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #26
0
inline void
GemmTTB
( Orientation orientationOfA, 
  Orientation orientationOfB,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmTTB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( orientationOfA == NORMAL || orientationOfB == NORMAL )
        throw std::logic_error
        ("GemmTTB expects A and B to be (Conjugate)Transposed");
    if( A.Width()  != C.Height() ||
        B.Height() != C.Width()  ||
        A.Height() != B.Width()    )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmTTB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AL(g), AR(g),
                  A0(g), A1(g), A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,VR,  STAR> A1_VR_STAR(g);
    DistMatrix<T,STAR,MR  > A1AdjOrTrans_STAR_MR(g);
    DistMatrix<T,STAR,MC  > D1_STAR_MC(g);
    DistMatrix<T,MR,  MC  > D1_MR_MC(g);
    DistMatrix<T> D1(g);

    A1_VR_STAR.AlignWith( B );
    A1AdjOrTrans_STAR_MR.AlignWith( B );
    D1_STAR_MC.AlignWith( B );

    // Start the algorithm 
    Scale( beta, C );
    LockedPartitionRight( A, AL, AR, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AR.Width() > 0 )
    {
        LockedRepartitionRight
        ( AL, /**/     AR,
          A0, /**/ A1, A2 );
 
        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        D1.AlignWith( C1 );
        Zeros( C1.Height(), C1.Width(), D1_STAR_MC );
        //--------------------------------------------------------------------//
        A1_VR_STAR = A1;
        if( orientationOfA == ADJOINT )
            A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR );
        else
            A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR );
 
        // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H]
        //           = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC]
        LocalGemm
        ( NORMAL, orientationOfB, 
          alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC );

        // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows
        D1_MR_MC.SumScatterFrom( D1_STAR_MC );
        D1 = D1_MR_MC; 
        Axpy( T(1), D1, C1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();

        SlideLockedPartitionRight
        ( AL,     /**/ AR,
          A0, A1, /**/ A2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #27
0
void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
	{
	int	r;
	const u8 *p=inp;
	union	{ u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;

#ifdef GO_FOR_MMX
	GO_FOR_MMX(ctx,inp,n);
#endif
							do {
#ifdef OPENSSL_SMALL_FOOTPRINT
	u64	L[8];
	int	i;

	for (i=0;i<64;i++)	S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
	for (r=0;r<ROUNDS;r++)
		{
		for (i=0;i<8;i++)
			{
			L[i]  = i ? 0 : RC[r];
			L[i] ^=	C0(K,i)       ^ C1(K,(i-1)&7) ^
				C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
				C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
				C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
			}
		memcpy (K.q,L,64);
		for (i=0;i<8;i++)
			{
			L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
				C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
				C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
				C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
			}
		memcpy (S.q,L,64);
		}
	for (i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
#else
	u64	L0,L1,L2,L3,L4,L5,L6,L7;

#ifdef __STRICT_ALIGNMENT
	if ((size_t)p & 7)
		{
		memcpy (S.c,p,64);
		S.q[0] ^= (K.q[0] = H->q[0]);
		S.q[1] ^= (K.q[1] = H->q[1]);
		S.q[2] ^= (K.q[2] = H->q[2]);
		S.q[3] ^= (K.q[3] = H->q[3]);
		S.q[4] ^= (K.q[4] = H->q[4]);
		S.q[5] ^= (K.q[5] = H->q[5]);
		S.q[6] ^= (K.q[6] = H->q[6]);
		S.q[7] ^= (K.q[7] = H->q[7]);
		}
	else
#endif
		{
		const u64 *pa = (const u64*)p;
		S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
		S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
		S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
		S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
		S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
		S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
		S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
		S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
		}

	for(r=0;r<ROUNDS;r++)
		{
#ifdef SMALL_REGISTER_BANK
		L0 =	C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
			C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
		L1 =	C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
			C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
		L2 =	C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
			C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
		L3 =	C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
			C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
		L4 =	C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
			C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
		L5 =	C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
			C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
		L6 =	C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
			C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
		L7 =	C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
			C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);

		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;

		L0 ^=	C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
			C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
		L1 ^=	C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
			C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
		L2 ^=	C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
			C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
		L3 ^=	C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
			C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
		L4 ^=	C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
			C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
		L5 ^=	C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
			C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
		L6 ^=	C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
			C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
		L7 ^=	C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
			C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);

		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
#else
		L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
		L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
		L0 ^= RC[r];

		L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
		L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);

		L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
		L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);

		L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
		L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);

		L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
		L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);

		L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
		L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);

		L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
		L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);

		L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
		L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);

		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;

		L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
		L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);

		L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
		L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);

		L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
		L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);

		L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
		L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);

		L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
		L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);

		L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
		L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);

		L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
		L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);

		L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
		L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);

		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
#endif
		}

#ifdef __STRICT_ALIGNMENT
	if ((size_t)p & 7)
		{
		int i;
		for(i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
		}
	else
#endif
		{
		const u64 *pa=(const u64 *)p;
		H->q[0] ^= S.q[0] ^ pa[0];
		H->q[1] ^= S.q[1] ^ pa[1];
		H->q[2] ^= S.q[2] ^ pa[2];
		H->q[3] ^= S.q[3] ^ pa[3];
		H->q[4] ^= S.q[4] ^ pa[4];
		H->q[5] ^= S.q[5] ^ pa[5];
		H->q[6] ^= S.q[6] ^ pa[6];
		H->q[7] ^= S.q[7] ^ pa[7];
		}
#endif
							p += 64;
							} while(--n);
	}
Exemple #28
0
inline void 
GemmNNB
( T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::GemmNNB");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
    if( A.Height() != C.Height() ||
        B.Width()  != C.Width()  ||
        A.Width()  != B.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal GemmNNB: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  B ~ " << B.Height() << " x " << B.Width() << "\n"
            << "  C ~ " << C.Height() << " x " << C.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> AT(g),  A0(g),
                  AB(g),  A1(g),
                          A2(g);
    DistMatrix<T> CT(g),  C0(g),
                  CB(g),  C1(g),
                          C2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC> A1_STAR_MC(g);
    DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g);

    A1_STAR_MC.AlignWith( B );
    D1Trans_MR_STAR.AlignWith( B );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDown
    ( A, AT,
         AB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( AB.Height() > 0 )
    {
        LockedRepartitionDown
        ( AT,  A0,
         /**/ /**/
               A1,
          AB,  A2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//
        A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR]

        // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ]
        LocalGemm
        ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR );

        C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( AT,  A0,
               A1,
         /**/ /**/
          AB,  A2 );
 
        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}                     
Exemple #29
0
inline void
SymmLLC
( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B,
  T beta,        DistMatrix<T>& C )
{
#ifndef RELEASE
    PushCallStack("internal::SymmLLC");
    if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
        throw std::logic_error
        ("{A,B,C} must be distributed over the same grid");
#endif
    const Grid& g = A.Grid();

    // Matrix views
    DistMatrix<T> 
        ATL(g), ATR(g),  A00(g), A01(g), A02(g),  AColPan(g),
        ABL(g), ABR(g),  A10(g), A11(g), A12(g),  ARowPan(g),
                         A20(g), A21(g), A22(g);
    DistMatrix<T> 
        BT(g),  B0(g),
        BB(g),  B1(g),
                B2(g);
    DistMatrix<T> 
        CT(g),  C0(g),  CAbove(g),
        CB(g),  C1(g),  CBelow(g),
                C2(g);

    // Temporary distributions
    DistMatrix<T,MC,  STAR> AColPan_MC_STAR(g);
    DistMatrix<T,STAR,MC  > ARowPan_STAR_MC(g);
    DistMatrix<T,MR,  STAR> B1Trans_MR_STAR(g);

    B1Trans_MR_STAR.AlignWith( C );

    // Start the algorithm
    Scale( beta, C );
    LockedPartitionDownDiagonal
    ( A, ATL, ATR,
         ABL, ABR, 0 );
    LockedPartitionDown
    ( B, BT,
         BB, 0 );
    PartitionDown
    ( C, CT,
         CB, 0 );
    while( CB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, /**/ A01, A02,
         /*************/ /******************/
               /**/       A10, /**/ A11, A12,
          ABL, /**/ ABR,  A20, /**/ A21, A22 );

        LockedRepartitionDown
        ( BT,  B0,
         /**/ /**/
               B1,
          BB,  B2 );

        RepartitionDown
        ( CT,  C0,
         /**/ /**/
               C1,
          CB,  C2 );

        LockedView1x2( ARowPan, A10, A11 );
        LockedView2x1
        ( AColPan, A11,
                   A21 );

        View2x1
        ( CAbove, C0,
                  C1 );
        View2x1
        ( CBelow, C1,
                  C2 );

        AColPan_MC_STAR.AlignWith( CBelow );
        ARowPan_STAR_MC.AlignWith( CAbove );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR = AColPan;
        ARowPan_STAR_MC = ARowPan;
        MakeTrapezoidal( LEFT,  LOWER,  0, AColPan_MC_STAR );
        MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC );

        B1Trans_MR_STAR.TransposeFrom( B1 );

        LocalGemm
        ( NORMAL, TRANSPOSE, 
          alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow );

        LocalGemm
        ( TRANSPOSE, TRANSPOSE, 
          alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove );
        //--------------------------------------------------------------------//
        AColPan_MC_STAR.FreeAlignments();
        ARowPan_STAR_MC.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( ATL, /**/ ATR,  A00, A01, /**/ A02,
               /**/       A10, A11, /**/ A12,
         /*************/ /******************/
          ABL, /**/ ABR,  A20, A21, /**/ A22 );

        SlideLockedPartitionDown
        ( BT,  B0,
               B1,
         /**/ /**/
          BB,  B2 );

        SlidePartitionDown
        ( CT,  C0,
               C1,
         /**/ /**/
          CB,  C2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #30
0
     SelMask(b,2,w) | \
     SelMask(b,3,w) | \
     SelMask(b,4,w) | \
     SelMask(b,5,w) | \
     SelMask(b,6,w) | \
     SelMask(b,7,w))

#if FB_UNIT == 16
#define fbStipple16Bits 0
#define fbStipple8Bits 0
static const pixman_bits_t fbStipple4Bits[16] = {
    C4(  0,4), C4(  1,4), C4(  2,4), C4(  3,4), C4(  4,4), C4(  5,4),
    C4(  6,4), C4(  7,4), C4(  8,4), C4(  9,4), C4( 10,4), C4( 11,4),
    C4( 12,4), C4( 13,4), C4( 14,4), C4( 15,4),};
static const pixman_bits_t fbStipple2Bits[4] = {
    C2(  0,8), C2(  1,8), C2(  2,8), C2(  3,8),
};
static const pixman_bits_t fbStipple1Bits[2] = {
    C1(  0,16), C1(  1,16),
};
#endif
#if FB_UNIT == 32
#define fbStipple16Bits 0
static const pixman_bits_t fbStipple8Bits[256] = {
    C8(  0,4), C8(  1,4), C8(  2,4), C8(  3,4), C8(  4,4), C8(  5,4),
    C8(  6,4), C8(  7,4), C8(  8,4), C8(  9,4), C8( 10,4), C8( 11,4),
    C8( 12,4), C8( 13,4), C8( 14,4), C8( 15,4), C8( 16,4), C8( 17,4),
    C8( 18,4), C8( 19,4), C8( 20,4), C8( 21,4), C8( 22,4), C8( 23,4),
    C8( 24,4), C8( 25,4), C8( 26,4), C8( 27,4), C8( 28,4), C8( 29,4),
    C8( 30,4), C8( 31,4), C8( 32,4), C8( 33,4), C8( 34,4), C8( 35,4),
    C8( 36,4), C8( 37,4), C8( 38,4), C8( 39,4), C8( 40,4), C8( 41,4),