virtual void compute(const vd::Time& t) { f2 = DSAT() * C2(); f1 = C1() * p1; grad(C3) = (f2); grad(C1) = 0 - (f1); grad(DSAT) = 0; grad(C2) = (f1) - (f2); }
bool collision(const CircleHitBox &cercleBox1, const CircleHitBox &cercleBox2) { sf::Vector2f C1(cercleBox1.p), C2(cercleBox2.p); float d2 = (C1.x-C2.x)*(C1.x-C2.x) + (C1.y-C2.y)*(C1.y-C2.y); if (d2 > (cercleBox1.rayon + cercleBox2.rayon)*(cercleBox1.rayon + cercleBox2.rayon)) return false; else return true; }
double SpinAdapted::CreCreDesDes::redMatrixElement(Csf c1, vector<Csf>& ladder, const SpinBlock* b) { assert( build_pattern == "(((CC)(D))(D))" ); double element = 0.0; int I = get_orbs()[0]; int J = get_orbs()[1]; int K = get_orbs()[2]; int L = get_orbs()[3]; // Must take into account how the 4-index is built from a combination of the 2-index ops std::vector<SpinQuantum> quantum_ladder = get_quantum_ladder().at("(((CC)(D))(D))"); assert( quantum_ladder.size() == 3 ); SpinQuantum deltaQuantum12 = quantum_ladder.at(0); SpinQuantum deltaQuantum123 = quantum_ladder.at(1); SpinQuantum deltaQuantum1234 = quantum_ladder.at(2); //FIXME components != 0 deltaQuantum[0] = deltaQuantum1234; // Spin quantum data for CC IrrepSpace sym12 = deltaQuantum12.get_symm(); int irrep12 = deltaQuantum12.get_symm().getirrep(); int spin12 = deltaQuantum12.get_s().getirrep(); // Spin quantum data for (CC)D IrrepSpace sym123 = deltaQuantum123.get_symm(); int irrep123 = deltaQuantum123.get_symm().getirrep(); int spin123= deltaQuantum123.get_s().getirrep(); // Spin quantum data for total operator IrrepSpace sym1234 = deltaQuantum1234.get_symm(); int irrep1234 = deltaQuantum1234.get_symm().getirrep(); int spin1234 = deltaQuantum1234.get_s().getirrep(); TensorOp C1(I, 1); TensorOp C2(J, 1); TensorOp D3(K,-1); TensorOp D4(L,-1); TensorOp CC = C1.product(C2, spin12, irrep12); TensorOp CCD = CC.product(D3, spin123, irrep123); TensorOp CCDD = CCD.product(D4, spin1234, irrep1234); //FIXME loop over deltaQuantum components int j = 0; for (int i=0; i<ladder.size(); i++) { int index = 0; double cleb=0.0; if (nonZeroTensorComponent(c1, deltaQuantum[j], ladder[i], index, cleb)) { std::vector<double> MatElements = calcMatrixElements(c1, CCDD, ladder[i]) ; element = MatElements[index]/cleb; break; } else continue; } return element; }
double SpinAdapted::StackCreCreCreCre::redMatrixElement(Csf c1, vector<Csf>& ladder, const StackSpinBlock* b) { assert( build_pattern == "(((CC)(C))(C))" ); double element = 0.0; int I = get_orbs()[0]; int J = get_orbs()[1]; int K = get_orbs()[2]; int L = get_orbs()[3]; int Slaterlength = c1.det_rep.begin()->first.size(); vector<bool> backupSlater1(Slaterlength,0), backupSlater2(Slaterlength,0); // Must take into account how the 4-index is built from a combination of the 2-index ops std::vector<SpinQuantum> quantum_ladder = get_quantum_ladder().at("(((CC)(C))(C))"); assert( quantum_ladder.size() == 3 ); SpinQuantum deltaQuantum12 = quantum_ladder.at(0); SpinQuantum deltaQuantum123 = quantum_ladder.at(1); SpinQuantum deltaQuantum1234 = quantum_ladder.at(2); deltaQuantum[0] = deltaQuantum1234; // Spin quantum data for CC IrrepSpace sym12 = deltaQuantum12.get_symm(); int irrep12 = deltaQuantum12.get_symm().getirrep(); int spin12 = deltaQuantum12.get_s().getirrep(); // Spin quantum data for (CC)C IrrepSpace sym123 = deltaQuantum123.get_symm(); int irrep123 = deltaQuantum123.get_symm().getirrep(); int spin123= deltaQuantum123.get_s().getirrep(); // Spin quantum data for total operator IrrepSpace sym1234 = deltaQuantum1234.get_symm(); int irrep1234 = deltaQuantum1234.get_symm().getirrep(); int spin1234 = deltaQuantum1234.get_s().getirrep(); TensorOp C1(I, 1); TensorOp C2(J, 1); TensorOp C3(K, 1); TensorOp C4(L, 1); TensorOp CC = C1.product(C2, spin12, irrep12); TensorOp CCC = CC.product(C3, spin123, irrep123); TensorOp CCCC = CCC.product(C4, spin1234, irrep1234); for (int i=0; i<ladder.size(); i++) { int index = 0; double cleb=0.0; if (nonZeroTensorComponent(c1, deltaQuantum[0], ladder[i], index, cleb)) { std::vector<double> MatElements = calcMatrixElements(c1, CCCC, ladder[i], backupSlater1, backupSlater2) ; element = MatElements[index]/cleb; break; } else continue; } return element; }
void RatioNSDistanceTransform::processRow(const BinaryPixelType *imageRow) { int col; #define N1_SETMINUS_N2_COUNT 1 #define N2_SETMINUS_N1_COUNT 5 #define N1_CAP_N2_COUNT 3 static vect n1[N1_SETMINUS_N2_COUNT] = {{-1, 1}}; static vect n2[N2_SETMINUS_N1_COUNT] = {{1, 0}, {2, 0}, {2, 1}, {1, 2}, {2, 2}}; static vect n12[N1_CAP_N2_COUNT] = {{0, 1}, {1, 1}, {0, 2}}; for (col = 0; col < _cols; col++) { if (imageRow[col] == 0) dtLines[0][col + 2] = 0; else { GrayscalePixelType val; GrayscalePixelType dt; int k; val = GRAYSCALE_MAX; for (k = 0; k < N1_SETMINUS_N2_COUNT; k++) { assert(n1[k].y >= 0); assert(n1[k].y <= 2); assert(col + 2 - n1[k].x >= 0); assert(col + 2 - n1[k].x < _cols + 3); val = std::min(val, dtLines[n1[k].y][col + 2 - n1[k].x]); } assert(C1(d.num, d.den, (int) val) == d.mbf1i(d.mbf1(val)+1)+1); dt = std::min((int)_dMax, d.mbf1i(d.mbf1(val)+1)+1); val = GRAYSCALE_MAX; for (k = 0; k < N2_SETMINUS_N1_COUNT; k++) { assert(n2[k].y >= 0); assert(n2[k].y <= 2); assert(col + 2 - n2[k].x >= 0); assert(col + 2 - n2[k].x < _cols + 3); val = std::min(val, dtLines[n2[k].y][col + 2 - n2[k].x]); } assert(C2(d.num, d.den, (int) val) == d.mbf2i(d.mbf2(val)+1)+1); dt = std::min((int) dt, d.mbf2i(d.mbf2(val)+1)+1); val = GRAYSCALE_MAX; for (k = 0; k < N1_CAP_N2_COUNT; k++) { assert(n12[k].y >= 0); assert(n12[k].y <= 2); assert(col + 2 - n12[k].x >= 0); assert(col + 2 - n12[k].x < _cols + 3); val = std::min(val, dtLines[n12[k].y][col + 2 - n12[k].x]); } dt = std::min((int) dt, val + 1); dtLines[0][col + 2] = dt; } } _consumer->processRow(dtLines[0]+2); rotate(); }
Molecule C2H4() { int nAtoms = 6; Eigen::Vector3d C1(0.0000000000, 0.0000000000, 1.2578920000); Eigen::Vector3d H1(0.0000000000, 1.7454620000, 2.3427160000); Eigen::Vector3d H2(0.0000000000, -1.7454620000, 2.3427160000); Eigen::Vector3d C2(0.0000000000, 0.0000000000, -1.2578920000); Eigen::Vector3d H3(0.0000000000, 1.7454620000, -2.3427160000); Eigen::Vector3d H4(0.0000000000, -1.7454620000, -2.3427160000); Eigen::MatrixXd geom(3, nAtoms); geom.col(0) = C1.transpose(); geom.col(1) = H1.transpose(); geom.col(2) = H2.transpose(); geom.col(3) = C2.transpose(); geom.col(4) = H3.transpose(); geom.col(5) = H4.transpose(); Eigen::VectorXd charges(6), masses(6); charges << 6.0, 1.0, 1.0, 6.0, 1.0, 1.0; masses << 12.00, 1.0078250, 1.0078250, 12.0, 1.0078250, 1.0078250; double radiusC = (1.70 * 1.20) / convertBohrToAngstrom; double radiusH = (1.20 * 1.20) / convertBohrToAngstrom; std::vector<Atom> atoms; atoms.push_back( Atom("Carbon", "C", charges(0), masses(0), radiusC, C1, 1.0) ); atoms.push_back( Atom("Hydrogen", "H", charges(1), masses(1), radiusH, H1, 1.0) ); atoms.push_back( Atom("Hydrogen", "H", charges(2), masses(2), radiusH, H2, 1.0) ); atoms.push_back( Atom("Carbon", "C", charges(3), masses(3), radiusC, C2, 1.0) ); atoms.push_back( Atom("Hydrogen", "H", charges(4), masses(4), radiusH, H3, 1.0) ); atoms.push_back( Atom("Hydrogen", "H", charges(5), masses(5), radiusH, H4, 1.0) ); std::vector<Sphere> spheres; Sphere sph1(C1, radiusC); Sphere sph2(H1, radiusH); Sphere sph3(H2, radiusH); Sphere sph4(C2, radiusC); Sphere sph5(H3, radiusH); Sphere sph6(H4, radiusH); spheres.push_back(sph1); spheres.push_back(sph2); spheres.push_back(sph3); spheres.push_back(sph4); spheres.push_back(sph5); spheres.push_back(sph6); // D2h as generated by Oxy, Oxz, Oyz Symmetry pGroup = buildGroup(3, 4, 2, 1); return Molecule(nAtoms, charges, masses, geom, atoms, spheres, pGroup); };
// Update the coefficients associated with the patch field void Foam::smoluchowskiJumpTFvPatchScalarField::updateCoeffs() { if (updated()) { return; } const fvPatchScalarField& pmu = patch().lookupPatchField<volScalarField, scalar>("mu"); const fvPatchScalarField& prho = patch().lookupPatchField<volScalarField, scalar>("rho"); const fvPatchField<scalar>& ppsi = patch().lookupPatchField<volScalarField, scalar>("psi"); const fvPatchVectorField& pU = patch().lookupPatchField<volVectorField, vector>("U"); // Prandtl number reading consistent with rhoCentralFoam const dictionary& thermophysicalProperties = db().lookupObject<IOdictionary>("thermophysicalProperties"); dimensionedScalar Pr ( dimensionedScalar::lookupOrDefault ( "Pr", thermophysicalProperties, 1.0 ) ); Field<scalar> C2 ( pmu/prho *sqrt(ppsi*constant::mathematical::piByTwo) *2.0*gamma_/Pr.value()/(gamma_ + 1.0) *(2.0 - accommodationCoeff_)/accommodationCoeff_ ); Field<scalar> aCoeff(prho.snGrad() - prho/C2); Field<scalar> KEbyRho(0.5*magSqr(pU)); valueFraction() = (1.0/(1.0 + patch().deltaCoeffs()*C2)); refValue() = Twall_; refGrad() = 0.0; mixedFvPatchScalarField::updateCoeffs(); }
int main(int argc, char **argv) { int m = 8000; int k = 3600; int n = 3600; int numsteps = 1; Matrix<double> A = RandomMatrix<double>(m, k); Matrix<double> B = RandomMatrix<double>(k, n); Matrix<double> C1(m, n), C2(m, n); Time([&] { MatMul(A, B, C1); }, "Classical gemm"); Time([&] { grey433_29_234::FastMatmul(A, B, C2, numsteps); }, "Fast (4, 3, 3)"); // Test for correctness. std::cout << "Maximum relative difference: " << MaxRelativeDiff(C1, C2) << std::endl; return 0; }
/*! * @param rxn Reaction index of the current reaction. This is used * as an index into vectors which have length n_total_rxn. * @param k This is a vector of integer values specifying the * species indices. The length of this vector species * the number of different species in the description. * The value of the entries are the species indices. * These are used as indexes into vectors which have * length n_total_species. * @param order This is a vector of the same length as vector k. * The order is used for the routine power(), which produces * a power law expression involving the species vector. * @param stoich This is used to handle fractional stoichiometric coefficients * on the product side of irreversible reactions. */ void StoichManagerN::add(size_t rxn, const std::vector<size_t>& k, const vector_fp& order, const vector_fp& stoich) { //printf ("add called\n"); if (order.size() != k.size()) { throw CanteraError("StoichManagerN::add()", "size of order and species arrays differ"); } if (stoich.size() != k.size()) { throw CanteraError("StoichManagerN::add()", "size of stoich and species arrays differ"); } bool frac = false; for (size_t n = 0; n < stoich.size(); n++) { if (fmod(stoich[n], 1.0) || fmod(order[n], 1.0)) { frac = true; break; } } if (frac || k.size() > 3) { m_cn_list.push_back(C_AnyN(rxn, k, order, stoich)); } else { // Try to express the reaction with unity stoichiometric // coefficients (by repeating species when necessary) so that the // simpler 'multiply' function can be used to compute the rate // instead of 'power'. std::vector<size_t> kRep; for (size_t n = 0; n < k.size(); n++) { for (size_t i = 0; i < stoich[n]; i++) kRep.push_back(k[n]); } switch (kRep.size()) { case 1: m_c1_list.push_back(C1(rxn, kRep[0])); break; case 2: m_c2_list.push_back(C2(rxn, kRep[0], kRep[1])); break; case 3: m_c3_list.push_back(C3(rxn, kRep[0], kRep[1], kRep[2])); break; default: m_cn_list.push_back(C_AnyN(rxn, k, order, stoich)); } } }
int main(int argc, char **argv) { int m = 2000; int k = 1200; int n = 2000; int numsteps = 1; Matrix<double> A = RandomMatrix<double>(m, k); Matrix<double> B = RandomMatrix<double>(k, n); Matrix<double> C1(m, n), C2(m, n); Time([&] { MatMul(A, B, C1); }, "Classical gemm"); Time([&] { classical222_8_24::FastMatmul(A, B, C2, numsteps); }, "Classical recursive (2, 2, 2)"); // Test for correctness. std::cout << "Maximum relative difference: " << MaxRelativeDiff(C1, C2) << std::endl; return 0; }
int main() { Array<int,2> A(2,3), B(2,3); A = 0, 3, 5, 1, 6, 9; B = 2, 5, 1, 9, 3, 4; Array<int,2> C(A+2*B); Array<int,2> C2(2,3); C2 = 4, 13, 7, 19, 12, 17; BZTEST(count(C2 == C) == 6); beginCheckAssert(); Array<int,2> D(i*10+j); endCheckAssert(); }
//====================================================================== tensor EightNode_Brick_u_p::getDampingTensorS( ) { int C2_dim[] = {Num_Nodes, Num_Nodes}; tensor C2(2,C2_dim,0.0); tensor Cc2(2,C2_dim,0.0); double r = 0.0; double rw = 0.0; double s = 0.0; double sw = 0.0; double t = 0.0; double tw = 0.0; double weight = 0.0; double det_of_Jacobian = 1.0; tensor Jacobian; tensor h; double Qqinv = (alpha-nf)/ks + nf/kf; int GP_c_r, GP_c_s, GP_c_t; for( GP_c_r = 0 ; GP_c_r < Num_IntegrationPts; GP_c_r++ ) { r = pts[GP_c_r]; rw = wts[GP_c_r]; for( GP_c_s = 0 ; GP_c_s < Num_IntegrationPts; GP_c_s++ ) { s = pts[GP_c_s]; sw = wts[GP_c_s]; for( GP_c_t = 0 ; GP_c_t < Num_IntegrationPts; GP_c_t++ ) { t = pts[GP_c_t]; tw = wts[GP_c_t]; h = shapeFunction(r,s,t); Jacobian = this->Jacobian_3D(r,s,t); det_of_Jacobian = Jacobian.determinant(); weight = rw * sw * tw * det_of_Jacobian; Cc2 = h("a")*h("k"); C2 += Cc2*(weight*Qqinv); } } } return C2; }
Z H2(xpn){ A z; ND2 I1; {I ar=a->r,an=a->n;XW; I bn=b0(a->p,an),wl=wr?*wd:1; aw=0; Q(bn<0,9) Q(ar>1,7) if(!wr) wl=wr=1; if(wn==1) aw=2; else Q(wl!=bn,8) if(wr==1&&wt!=Et){ W(gv(wt,an)) C2((I(*)())(!wt?(I(*)())x0:wt==Ft?(I(*)())x1:(I(*)())x2)) } v=tr(wr-1,wd+1); u=wn; W(ga(t=wt,wr,an*v,wd))*z->d=an; C2(x3) }}
Z H2(cmp){ A z;ND2 I1; {I ar=a->r,an=a->n;XW; I bn=b0(a->p,an),wl=wr?*wd:1; Q(bn==-1,9) aw=0; u=an==1; if((u)&&bn==1&&wr) R ic_or_copy(w); Q(ar>1,7) if(!wr) wl=wr=1; if(u) bn*=wl; else if(wn==1) aw=2; else Q(wl!=an,8) if(wr==1&&wt!=Et&&bn>=0){ W(gv(wt,bn)) C2((!wt?(I(*)())c0:wt==Ft?(I(*)())c1:(I(*)())c2)) } if(bn<0) bn=-bn; v=tr(wr-1,wd+1); W(ga(t=wt,wr,bn*v,wd))*z->d=bn; C2(c3) }}
int main(int argc, char **argv) { int m = 90; int k = 90; int n = 90; int numsteps = 1; Matrix<double> A = RandomMatrix<double>(m, k); Matrix<double> B = RandomMatrix<double>(k, n); Matrix<double> C1(m, n), C2(m, n); MatMul(A, B, C1); for (int numsteps = 1; numsteps <= 2; ++numsteps) { double lambda = DBL_EPSILON; std::cout << numsteps << std::endl; while (lambda < 1) { smirnov333_20_182_approx::FastMatmul(A, B, C2, numsteps, lambda); std::cout << lambda << ", " << MaxRelativeDiff(C1, C2) << "; "; lambda *= 2; } std::cout << std::endl; } return 0; }
cString cPlainKeyVia::PrintKeyNr(void) { char tmp[12]; const char *kn=tmp; switch(keynr) { case MBC3('T','P','S'): kn="TPS"; break; case MBC3('M','K',0): case MBC3('M','K',1): case MBC3('M','K',2): case MBC3('M','K',3): case MBC3('M','K',4): case MBC3('M','K',5): case MBC3('M','K',6): case MBC3('M','K',7): case MBC3('M','K',8): case MBC3('M','K',9): snprintf(tmp,sizeof(tmp),"TPSMK%d",C3(keynr)); break; default: { char c2=C2(keynr); if(c2=='D' || c2=='P' || c2=='X' || c2=='C' || c2=='E' || c2=='T') snprintf(tmp,sizeof(tmp),"%c%01X",c2,keynr & 0x0f); else snprintf(tmp,sizeof(tmp),"%02X",keynr); break; } } return kn; }
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
/* Subroutine */ int zlatzm_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c1, doublecomplex *c2, integer *ldc, doublecomplex *work) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix. Let P = I - tau*u*u', u = ( 1 ), ( v ) where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if SIDE = 'R'. If SIDE equals 'L', let C = [ C1 ] 1 [ C2 ] m-1 n Then C is overwritten by P*C. If SIDE equals 'R', let C = [ C1, C2 ] m 1 n-1 Then C is overwritten by C*P. Arguments ========= SIDE (input) CHARACTER*1 = 'L': form P * C = 'R': form C * P M (input) INTEGER The number of rows of the matrix C. N (input) INTEGER The number of columns of the matrix C. V (input) COMPLEX*16 array, dimension (1 + (M-1)*abs(INCV)) if SIDE = 'L' (1 + (N-1)*abs(INCV)) if SIDE = 'R' The vector v in the representation of P. V is not used if TAU = 0. INCV (input) INTEGER The increment between elements of v. INCV <> 0 TAU (input) COMPLEX*16 The value tau in the representation of P. C1 (input/output) COMPLEX*16 array, dimension (LDC,N) if SIDE = 'L' (M,1) if SIDE = 'R' On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 if SIDE = 'R'. On exit, the first row of P*C if SIDE = 'L', or the first column of C*P if SIDE = 'R'. C2 (input/output) COMPLEX*16 array, dimension (LDC, N) if SIDE = 'L' (LDC, N-1) if SIDE = 'R' On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the m x (n - 1) matrix C2 if SIDE = 'R'. On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P if SIDE = 'R'. LDC (input) INTEGER The leading dimension of the arrays C1 and C2. LDC >= max(1,M). WORK (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L' (M) if SIDE = 'R' ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1; doublecomplex z__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *) , zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); #define V(I) v[(I)-1] #define WORK(I) work[(I)-1] #define C2(I,J) c2[(I)-1 + ((J)-1)* ( *ldc)] #define C1(I,J) c1[(I)-1 + ((J)-1)* ( *ldc)] if (min(*m,*n) == 0 || tau->r == 0. && tau->i == 0.) { return 0; } if (lsame_(side, "L")) { /* w := conjg( C1 + v' * C2 ) */ zcopy_(n, &C1(1,1), ldc, &WORK(1), &c__1); zlacgv_(n, &WORK(1), &c__1); i__1 = *m - 1; zgemv_("Conjugate transpose", &i__1, n, &c_b1, &C2(1,1), ldc, & V(1), incv, &c_b1, &WORK(1), &c__1); /* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' [ C2 ] [ C2 ] [ v ] */ zlacgv_(n, &WORK(1), &c__1); z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(n, &z__1, &WORK(1), &c__1, &C1(1,1), ldc); i__1 = *m - 1; z__1.r = -tau->r, z__1.i = -tau->i; zgeru_(&i__1, n, &z__1, &V(1), incv, &WORK(1), &c__1, &C2(1,1), ldc); } else if (lsame_(side, "R")) { /* w := C1 + C2 * v */ zcopy_(m, &C1(1,1), &c__1, &WORK(1), &c__1); i__1 = *n - 1; zgemv_("No transpose", m, &i__1, &c_b1, &C2(1,1), ldc, &V(1), incv, &c_b1, &WORK(1), &c__1); /* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */ z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(m, &z__1, &WORK(1), &c__1, &C1(1,1), &c__1); i__1 = *n - 1; z__1.r = -tau->r, z__1.i = -tau->i; zgerc_(m, &i__1, &z__1, &WORK(1), &c__1, &V(1), incv, &C2(1,1), ldc); } return 0; /* End of ZLATZM */ } /* zlatzm_ */
int main(int, char**) { std::vector<std::chrono::duration<double,std::milli>> duration_vector_1; std::vector<std::chrono::duration<double,std::milli>> duration_vector_2; #if SYNTHETIC_INPUT Halide::Buffer<uint8_t> im1(10, 10); Halide::Buffer<uint8_t> im2(10, 10); for (int i = 0; i < 10; i++) for (int j = 0; j < 10; j++) { im1(i, j) = (uint8_t) i*i+j*j; im2(i, j) = (uint8_t) i*i+j*j; } #else Halide::Buffer<uint8_t> im1 = Halide::Tools::load_image("./utils/images/rgb.png"); Halide::Buffer<uint8_t> im2 = Halide::Tools::load_image("./utils/images/rgb.png"); #endif Halide::Buffer<float> Ix_m(im1.width(), im1.height()); Halide::Buffer<float> Iy_m(im1.width(), im1.height()); Halide::Buffer<float> It_m(im1.width(), im1.height()); Halide::Buffer<int> C1(_NC); Halide::Buffer<int> C2(_NC); Halide::Buffer<int> SIZES(2); Halide::Buffer<int> u(_NC); Halide::Buffer<int> v(_NC); Halide::Buffer<float> A(2, 4*w*w); Halide::Buffer<float> tA(4*w*w, 2); Halide::Buffer<double> pinvA(4*w*w, 2); Halide::Buffer<double> det(1); Halide::Buffer<float> tAA(2, 2); Halide::Buffer<double> X(2, 2); SIZES(0) = im1.height(); SIZES(1) = im1.width(); C1(0) = 500; C2(0) = 400; C1(1) = 800; C2(1) = 900; C1(2) = 200; C2(2) = 400; C1(3) = 400; C2(3) = 200; C1(4) = 400; C2(4) = 500; C1(5) = 800; C2(5) = 200; C1(6) = 200; C2(6) = 900; C1(7) = 900; C2(7) = 200; det(0) = 0; init_buffer(Ix_m, (float) 0); init_buffer(Iy_m, (float) 0); init_buffer(It_m, (float) 0); init_buffer(A, (float) 0); init_buffer(tA, (float) 0); init_buffer(pinvA, (double) 0); init_buffer(tAA, (float) 0); init_buffer(X, (double) 0); // Warm up optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(), Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(), C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer()); // Tiramisu for (int i=0; i<NB_TESTS; i++) { auto start1 = std::chrono::high_resolution_clock::now(); optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(), Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(), C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer()); auto end1 = std::chrono::high_resolution_clock::now(); std::chrono::duration<double,std::milli> duration1 = end1 - start1; duration_vector_1.push_back(duration1); } std::cout << "Time: " << median(duration_vector_1) << std::endl; #if SYNTHETIC_INPUT print_buffer(im1); print_buffer(im2); print_buffer(Ix_m); print_buffer(Iy_m); print_buffer(It_m); print_buffer(A); print_buffer(tA); print_buffer(tAA); print_buffer(det); print_buffer(X); print_buffer(pinvA); #endif std::cout << "Output" << std::endl; print_buffer(u); print_buffer(v); return 0; }
inline void GemmTTA ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTA expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MR, STAR> D1_MR_STAR(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); B1_STAR_MC.AlignWith( A ); D1_MR_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionRight( C, CL, CR, 0 ); while( BB.Height() > 0 ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_MR_STAR ); //--------------------------------------------------------------------// B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR] // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T // = alpha (A^T)[MR,MC] (B1^T)[MC,*] LocalGemm ( orientationOfA, orientationOfB, alpha, A, B1_STAR_MC, T(0), D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
int main(int argc, char *argv[]) { int i, returnierr=0; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif // Uncomment to debug in parallel int tmp; if (Comm.MyPID()==0) cin >> tmp; Comm.Barrier(); bool verbose = false; bool veryVerbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; // Check if we should print lots of results to standard out if (argc>2) if (argv[2][0]=='-' && argv[2][1]=='v') veryVerbose = true; if (verbose && Comm.MyPID()==0) std::cout << Epetra_Version() << std::endl << std::endl; if (!verbose) Comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose) std::cout << Comm << std::endl << std::flush; bool verbose1 = verbose; if (verbose) verbose = (Comm.MyPID()==0); bool veryVerbose1 = veryVerbose; if (veryVerbose) veryVerbose = (Comm.MyPID()==0); int NumMyElements = 100; if (veryVerbose1) NumMyElements = 10; NumMyElements += Comm.MyPID(); int MaxNumMyElements = NumMyElements+Comm.NumProc()-1; int * ElementSizeList = new int[NumMyElements]; long long * MyGlobalElements = new long long[NumMyElements]; for (i = 0; i<NumMyElements; i++) { MyGlobalElements[i] = (Comm.MyPID()*MaxNumMyElements+i)*2; ElementSizeList[i] = i%6 + 2; // elementsizes go from 2 to 7 } Epetra_BlockMap Map(-1LL, NumMyElements, MyGlobalElements, ElementSizeList, 0, Comm); delete [] ElementSizeList; delete [] MyGlobalElements; Epetra_MapColoring C0(Map); int * elementColors = new int[NumMyElements]; int maxcolor = 24; int * colorCount = new int[maxcolor]; int ** colorLIDs = new int*[maxcolor]; for (i=0; i<maxcolor; i++) colorCount[i] = 0; for (i=0; i<maxcolor; i++) colorLIDs[i] = 0; int defaultColor = C0.DefaultColor(); for (i=0; i<Map.NumMyElements(); i++) { assert(C0[i]==defaultColor); assert(C0(Map.GID64(i))==defaultColor); if (i%2==0) C0[i] = i%6+5+i%14; // cycle through 5...23 on even elements else C0(Map.GID64(i)) = i%5+1; // cycle through 1...5 on odd elements elementColors[i] = C0[i]; // Record color of ith element for use below colorCount[C0[i]]++; // Count how many of each color for checking below } if (veryVerbose) std::cout << "Original Map Coloring using element-by-element definitions" << std::endl; if (veryVerbose1) std::cout << C0 << std::endl; int numColors = 0; for (i=0; i<maxcolor; i++) if (colorCount[i]>0) { numColors++; colorLIDs[i] = new int[colorCount[i]]; } for (i=0; i<maxcolor; i++) colorCount[i] = 0; for (i=0; i<Map.NumMyElements(); i++) colorLIDs[C0[i]][colorCount[C0[i]]++] = i; int newDefaultColor = -1; Epetra_MapColoring C1(Map, elementColors, newDefaultColor); if (veryVerbose) std::cout << "Same Map Coloring using one-time construction" << std::endl; if (veryVerbose1) std::cout << C1 << std::endl; assert(C1.DefaultColor()==newDefaultColor); for (i=0; i<Map.NumMyElements(); i++) assert(C1[i]==C0[i]); Epetra_MapColoring C2(C1); if (veryVerbose) std::cout << "Same Map Coloring using copy constructor" << std::endl; if (veryVerbose1) std::cout << C1 << std::endl; for (i=0; i<Map.NumMyElements(); i++) assert(C2[i]==C0[i]); assert(C2.DefaultColor()==newDefaultColor); assert(numColors==C2.NumColors()); for (i=0; i<maxcolor; i++) { int curNumElementsWithColor = C2.NumElementsWithColor(i); assert(colorCount[i]==curNumElementsWithColor); int * curColorLIDList = C2.ColorLIDList(i); if (curNumElementsWithColor==0) { assert(curColorLIDList==0); } else for (int j=0; j<curNumElementsWithColor; j++) assert(curColorLIDList[j]==colorLIDs[i][j]); } int curColor = 1; Epetra_Map * Map1 = C2.GenerateMap(curColor); Epetra_BlockMap * Map2 = C2.GenerateBlockMap(curColor); assert(Map1->NumMyElements()==colorCount[curColor]); assert(Map2->NumMyElements()==colorCount[curColor]); for (i=0; i<Map1->NumMyElements(); i++) { assert(Map1->GID64(i)==Map.GID64(colorLIDs[curColor][i])); assert(Map2->GID64(i)==Map.GID64(colorLIDs[curColor][i])); assert(Map2->ElementSize(i)==Map.ElementSize(colorLIDs[curColor][i])); } // Now test data redistribution capabilities Epetra_Map ContiguousMap(-1LL, Map.NumMyElements(), Map.IndexBase64(), Comm); // This vector contains the element sizes for the original map. Epetra_IntVector elementSizes(Copy, ContiguousMap, Map.ElementSizeList()); Epetra_LongLongVector elementIDs(Copy, ContiguousMap, Map.MyGlobalElements64()); Epetra_IntVector elementColorValues(Copy, ContiguousMap, C2.ElementColors()); long long NumMyElements0 = 0; if (Comm.MyPID()==0) NumMyElements0 = Map.NumGlobalElements64(); Epetra_Map CMap0(-1LL, NumMyElements0, Map.IndexBase64(), Comm); Epetra_Import importer(CMap0, ContiguousMap); Epetra_IntVector elementSizes0(CMap0); Epetra_LongLongVector elementIDs0(CMap0); Epetra_IntVector elementColorValues0(CMap0); elementSizes0.Import(elementSizes, importer, Insert); elementIDs0.Import(elementIDs, importer, Insert); elementColorValues0.Import(elementColorValues, importer, Insert); Epetra_BlockMap MapOnPE0(-1LL,NumMyElements0, elementIDs0.Values(), elementSizes0.Values(), Map.IndexBase64(), Comm); Epetra_Import importer1(MapOnPE0, Map); Epetra_MapColoring ColoringOnPE0(MapOnPE0); ColoringOnPE0.Import(C2, importer1, Insert); for (i=0; i<MapOnPE0.NumMyElements(); i++) assert(ColoringOnPE0[i]==elementColorValues0[i]); if (veryVerbose) std::cout << "Same Map Coloring on PE 0 only" << std::endl; if (veryVerbose1) std::cout << ColoringOnPE0 << std::endl; Epetra_MapColoring C3(Map); C3.Export(ColoringOnPE0, importer1, Insert); for (i=0; i<Map.NumMyElements(); i++) assert(C3[i]==C2[i]); if (veryVerbose) std::cout << "Same Map Coloring after Import/Export exercise" << std::endl; if (veryVerbose1) std::cout << ColoringOnPE0 << std::endl; if (verbose) std::cout << "Checked OK\n\n" << std::endl; if (verbose1) { if (verbose) std::cout << "Test ostream << operator" << std::endl << std::flush; std::cout << C0 << std::endl; } delete [] elementColors; for (i=0; i<maxcolor; i++) if (colorLIDs[i]!=0) delete [] colorLIDs[i]; delete [] colorLIDs; delete [] colorCount; delete Map1; delete Map2; #ifdef EPETRA_MPI MPI_Finalize(); #endif return returnierr; }
inline void SymmLLA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T> Z1(g); DistMatrix<T,MC,STAR> Z1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); B1_MC_STAR.AlignWith( A ); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); Z1_MC_STAR.AlignWith( A ); Z1_MR_STAR.AlignWith( A ); Scale( beta, C ); LockedPartitionRight ( B, BL, BR, 0 ); PartitionRight ( C, CL, CR, 0 ); while( CL.Width() < C.Width() ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Z1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), Z1_MC_STAR ); Zeros( C1.Height(), C1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// B1_MC_STAR = B1; B1_VR_STAR = B1_MC_STAR; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); LocalSymmetricAccumulateLL ( TRANSPOSE, alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); Z1 = Z1_MR_MC; Z1.SumScatterUpdate( T(1), Z1_MC_STAR ); Axpy( T(1), Z1, C1 ); //--------------------------------------------------------------------// Z1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T,MC,STAR> D1_MC_STAR(g); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); D1_MC_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Zeros( C1.Height(), C1.Width(), D1_MC_STAR ); //--------------------------------------------------------------------// B1_VR_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); // D1[MC,*] := alpha A[MC,MR] B1[MR,*] LocalGemm ( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, T(0), D1_MC_STAR ); // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows C1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
bool CollisionManager::isIntersectsPolygonPolygon(ICollisionHull* polygon1, ICollisionHull* polygon2) { PoligonCollisionHull* poligonCH1 = dynamic_cast<PoligonCollisionHull*>(polygon1); PoligonCollisionHull* poligonCH2 = dynamic_cast<PoligonCollisionHull*>(polygon2); PointList points1 = poligonCH1->getPoints(); PointList points2 = poligonCH2->getPoints(); std::vector<float> A1(points1.size()); std::vector<float> B1(points1.size()); std::vector<float> C1(points1.size()); std::vector<float> A2(points2.size()); std::vector<float> B2(points2.size()); std::vector<float> C2(points2.size()); int i0, i1; float D; Vector3 P0, P1; for(int i = 0; i < points1.size(); i++) { i0 = i; i1 = (i == (points1.size() - 1)) ? 0 : i + 1; P0 = points1[i0]; P1 = points1[i1]; A1[i] = P0._y - P1._y; B1[i] = P1._x - P0._x; C1[i] = (P0._x * P1._y) - (P1._x * P0._y); } for(int i = 0; i < points2.size(); i++) { i0 = i; i1 = (i == (points2.size() - 1)) ? 0 : i + 1; P0 = points2[i0]; P1 = points2[i1]; A2[i] = P0._y - P1._y; B2[i] = P1._x - P0._x; C2[i] = (P0._x * P1._y) - (P1._x * P0._y); } //cheking 1 against 2 for(int i = 0; i < points1.size(); i++) { for(int j = 0; j < points2.size(); j++) { P0 = points1[i]; D = (P0._x * A2[j]) + (P0._y * B2[j]) + C2[j]; if(D > 0) { return true; } } }//cheking 1 against 2 //cheking 2 against 1 for(int i = 0; i < points2.size(); i++) { for(int j = 0; j < points1.size(); j++) { P0 = points2[i]; D = (P0._x * A1[j]) + (P0._y * B1[j]) + C1[j]; if(D > 0) { return true; } } }//cheking 2 against 1 return false; }
inline void GemmNNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNDot: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } B1_VC_STAR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } A1_STAR_VR.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmTTB ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTB expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > D1_STAR_MC(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); A1_VR_STAR.AlignWith( B ); A1AdjOrTrans_STAR_MR.AlignWith( B ); D1_STAR_MC.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_STAR_MC ); //--------------------------------------------------------------------// A1_VR_STAR = A1; if( orientationOfA == ADJOINT ) A1AdjOrTrans_STAR_MR.AdjointFrom( A1_VR_STAR ); else A1AdjOrTrans_STAR_MR.TransposeFrom( A1_VR_STAR ); // D1[*,MC] := alpha (A1[MR,*])^[T/H] (B[MC,MR])^[T/H] // = alpha (A1^[T/H])[*,MR] (B^[T/H])[MR,MC] LocalGemm ( NORMAL, orientationOfB, alpha, A1AdjOrTrans_STAR_MR, B, T(0), D1_STAR_MC ); // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows D1_MR_MC.SumScatterFrom( D1_STAR_MC ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n) { int r; const u8 *p=inp; union { u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q; #ifdef GO_FOR_MMX GO_FOR_MMX(ctx,inp,n); #endif do { #ifdef OPENSSL_SMALL_FOOTPRINT u64 L[8]; int i; for (i=0;i<64;i++) S.c[i] = (K.c[i] = H->c[i]) ^ p[i]; for (r=0;r<ROUNDS;r++) { for (i=0;i<8;i++) { L[i] = i ? 0 : RC[r]; L[i] ^= C0(K,i) ^ C1(K,(i-1)&7) ^ C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^ C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^ C6(K,(i-6)&7) ^ C7(K,(i-7)&7); } memcpy (K.q,L,64); for (i=0;i<8;i++) { L[i] ^= C0(S,i) ^ C1(S,(i-1)&7) ^ C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^ C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^ C6(S,(i-6)&7) ^ C7(S,(i-7)&7); } memcpy (S.q,L,64); } for (i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i]; #else u64 L0,L1,L2,L3,L4,L5,L6,L7; #ifdef __STRICT_ALIGNMENT if ((size_t)p & 7) { memcpy (S.c,p,64); S.q[0] ^= (K.q[0] = H->q[0]); S.q[1] ^= (K.q[1] = H->q[1]); S.q[2] ^= (K.q[2] = H->q[2]); S.q[3] ^= (K.q[3] = H->q[3]); S.q[4] ^= (K.q[4] = H->q[4]); S.q[5] ^= (K.q[5] = H->q[5]); S.q[6] ^= (K.q[6] = H->q[6]); S.q[7] ^= (K.q[7] = H->q[7]); } else #endif { const u64 *pa = (const u64*)p; S.q[0] = (K.q[0] = H->q[0]) ^ pa[0]; S.q[1] = (K.q[1] = H->q[1]) ^ pa[1]; S.q[2] = (K.q[2] = H->q[2]) ^ pa[2]; S.q[3] = (K.q[3] = H->q[3]) ^ pa[3]; S.q[4] = (K.q[4] = H->q[4]) ^ pa[4]; S.q[5] = (K.q[5] = H->q[5]) ^ pa[5]; S.q[6] = (K.q[6] = H->q[6]) ^ pa[6]; S.q[7] = (K.q[7] = H->q[7]) ^ pa[7]; } for(r=0;r<ROUNDS;r++) { #ifdef SMALL_REGISTER_BANK L0 = C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^ C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r]; L1 = C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^ C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2); L2 = C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^ C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3); L3 = C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^ C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4); L4 = C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^ C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5); L5 = C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^ C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6); L6 = C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^ C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7); L7 = C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^ C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0); K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3; K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7; L0 ^= C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^ C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1); L1 ^= C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^ C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2); L2 ^= C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^ C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3); L3 ^= C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^ C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4); L4 ^= C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^ C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5); L5 ^= C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^ C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6); L6 ^= C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^ C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7); L7 ^= C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^ C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0); S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3; S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7; #else L0 = C0(K,0); L1 = C1(K,0); L2 = C2(K,0); L3 = C3(K,0); L4 = C4(K,0); L5 = C5(K,0); L6 = C6(K,0); L7 = C7(K,0); L0 ^= RC[r]; L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1); L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1); L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2); L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2); L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3); L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3); L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4); L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4); L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5); L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5); L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6); L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6); L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7); L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7); K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3; K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7; L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0); L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0); L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1); L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1); L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2); L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2); L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3); L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3); L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4); L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4); L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5); L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5); L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6); L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6); L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7); L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7); S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3; S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7; #endif } #ifdef __STRICT_ALIGNMENT if ((size_t)p & 7) { int i; for(i=0;i<64;i++) H->c[i] ^= S.c[i] ^ p[i]; } else #endif { const u64 *pa=(const u64 *)p; H->q[0] ^= S.q[0] ^ pa[0]; H->q[1] ^= S.q[1] ^ pa[1]; H->q[2] ^= S.q[2] ^ pa[2]; H->q[3] ^= S.q[3] ^ pa[3]; H->q[4] ^= S.q[4] ^ pa[4]; H->q[5] ^= S.q[5] ^ pa[5]; H->q[6] ^= S.q[6] ^ pa[6]; H->q[7] ^= S.q[7] ^ pa[7]; } #endif p += 64; } while(--n); }
inline void GemmNNB ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNB: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ] LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, T(0), D1Trans_MR_STAR ); C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void SymmLLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A10, A11 ); LockedView2x1 ( AColPan, A11, A21 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Trans_MR_STAR.TransposeFrom( B1 ); LocalGemm ( NORMAL, TRANSPOSE, alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow ); LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
SelMask(b,2,w) | \ SelMask(b,3,w) | \ SelMask(b,4,w) | \ SelMask(b,5,w) | \ SelMask(b,6,w) | \ SelMask(b,7,w)) #if FB_UNIT == 16 #define fbStipple16Bits 0 #define fbStipple8Bits 0 static const pixman_bits_t fbStipple4Bits[16] = { C4( 0,4), C4( 1,4), C4( 2,4), C4( 3,4), C4( 4,4), C4( 5,4), C4( 6,4), C4( 7,4), C4( 8,4), C4( 9,4), C4( 10,4), C4( 11,4), C4( 12,4), C4( 13,4), C4( 14,4), C4( 15,4),}; static const pixman_bits_t fbStipple2Bits[4] = { C2( 0,8), C2( 1,8), C2( 2,8), C2( 3,8), }; static const pixman_bits_t fbStipple1Bits[2] = { C1( 0,16), C1( 1,16), }; #endif #if FB_UNIT == 32 #define fbStipple16Bits 0 static const pixman_bits_t fbStipple8Bits[256] = { C8( 0,4), C8( 1,4), C8( 2,4), C8( 3,4), C8( 4,4), C8( 5,4), C8( 6,4), C8( 7,4), C8( 8,4), C8( 9,4), C8( 10,4), C8( 11,4), C8( 12,4), C8( 13,4), C8( 14,4), C8( 15,4), C8( 16,4), C8( 17,4), C8( 18,4), C8( 19,4), C8( 20,4), C8( 21,4), C8( 22,4), C8( 23,4), C8( 24,4), C8( 25,4), C8( 26,4), C8( 27,4), C8( 28,4), C8( 29,4), C8( 30,4), C8( 31,4), C8( 32,4), C8( 33,4), C8( 34,4), C8( 35,4), C8( 36,4), C8( 37,4), C8( 38,4), C8( 39,4), C8( 40,4), C8( 41,4),