/* evalPositions: * The input is laid out, but node coordinates * are relative to smallest containing cluster. * Walk through all nodes and clusters, translating * the positions to absolute coordinates. * Assume that when called, g's bounding box is * in absolute coordinates and that box of root graph * has LL at origin. */ static void evalPositions(graph_t * g, graph_t* rootg) { int i; graph_t *subg; node_t *n; boxf bb; boxf sbb; bb = BB(g); /* translate nodes in g */ if (g != rootg) { for (n = agfstnode(g); n; n = agnxtnode(g, n)) { if (PARENT(n) != g) continue; ND_pos(n)[0] += bb.LL.x; ND_pos(n)[1] += bb.LL.y; } } /* translate top-level clusters and recurse */ for (i = 1; i <= GD_n_cluster(g); i++) { subg = GD_clust(g)[i]; if (g != rootg) { sbb = BB(subg); sbb.LL.x += bb.LL.x; sbb.LL.y += bb.LL.y; sbb.UR.x += bb.LL.x; sbb.UR.y += bb.LL.y; BB(subg) = sbb; } evalPositions(subg, rootg); } }
/* ---------------------------------------------------------------------- */ int DW (double T) /* ---------------------------------------------------------------------- */ /* C C SUBROUTINE TO CALCULATE THE DENSITY OF WATER AS A FUNCTION OF C TEMPERATURE. T IS IN KELVIN, P IS IN PASCALS, DW0 IS IN G/CM^3 C C FROM L. HAAR, J. S. GALLAGHER, AND G. S. KELL, (1984) C */ { double FP = 9.869232667e0, P, DGSS, D; BB (T); P = 1.0e0 / FP; if (T > 373.149e0) P = PS (T); DGSS = P / T / .4e0; if (T < TZ) { DGSS = 1.0e0 / (VLEST (T)); } DFIND (&D, P, DGSS, T); DW0 = D; VP = P * FP; return OK; }
int main(int argc, char **argv) { int n, q, i, a, j = 1; scanf("%d %d", &n, &q); while(n != 0 && q != 0){ int vetor[n], quest[q]; for(i=0;i<n;i++){ scanf("%d", &vetor[i]); } for(i=0;i<q;i++){ scanf("%d", &quest[i]); } qsort(&vetor, n, sizeof(int), cmp); printf("CASE# %d:\n", j); j++; i = 0; while(q != 0){ a = BB(vetor, quest[i], n); if(a != -1){ printf("%d found at %d\n", quest[i], (a+1)); }else{ printf("%d not found\n", quest[i]); } i++; q--; } scanf("%d %d", &n, &q); } return 0; }
KOKKOS_INLINE_FUNCTION int Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::SparseSparseSuperNodes,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, CrsExecViewTypeA &A, CrsExecViewTypeB &B, const ScalarType beta, CrsExecViewTypeC &C) { if (member.team_rank() == 0) { DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> AA(A.Flat()); DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> BB(B.Flat()); DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> CC(C.Flat()); Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::ExternalBlas,Variant::One> ::invoke(policy, member, alpha, AA, BB, beta, CC); } return 0; }
bool GraphTest::run() { if(BB(rand()%dystans, rand()%dystans)!=INT_MAX) { return true; } return false; }
/** * BB1: * "==" D BB2 * BB2.st = BB1.st == D.val * BB1.val = BB2.val * BB1: * "!=" D BB2 * BB2.st = BB1.st || D.val * BB1.val = BB2.val * BB: * empty * BB.val = BB.st */ int BB(int st){ if (lex(0) == '='){ lex(1); if (lex(0) == '='){ lex(1); return BB(st == D()); } } else if (lex(0) == '!'){ lex(1); if (lex(0) == '='){ lex(1); return BB(st != D()); } } else { return st; } }
/*-------------------------------------------------------------------------* * PL_QUERY_END * * * *-------------------------------------------------------------------------*/ void Pl_Query_End(int op) { WamWord *query_b, *prev_b, *b; Bool recoverable; if (query_stack_top == query_stack) Pl_Fatal_Error("Pl_Query_End() but no query remaining"); query_b = *--query_stack_top; pl_query_top_b = query_stack_top[-1]; recoverable = (ALTB(query_b) == Prolog_Predicate(PL_QUERY_RECOVER_ALT, 0)); prev_b = BB(query_b); switch (op) { case PL_RECOVER: Assign_B(query_b); if (!recoverable) Pl_Fatal_Error("Pl_Query_End(PL_RECOVER) but unrecoverable query"); Pl_Delete_Choice_Point(0); /* remove recover chc-point */ break; case PL_CUT: Assign_B((recoverable) ? prev_b : query_b); break; default: /* case PL_KEEP_FOR_PROLOG */ if (recoverable) { if (B == query_b) Assign_B(prev_b); else for (b = B; b > query_b; b = BB(b)) /* unlink recover chc-point */ if (BB(b) == query_b) BB(b) = prev_b; } Pl_Keep_Rest_For_Prolog(query_b); } }
inline void LocalTrrkKernel ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T,STAR,MC >& A, const DistMatrix<T,MR, STAR>& B, T beta, DistMatrix<T,MC, MR >& C ) { #ifndef RELEASE PushCallStack("LocalTrrkKernel"); CheckInput( orientationOfA, orientationOfB, A, B, C ); #endif const Grid& g = C.Grid(); DistMatrix<T,STAR,MC> AL(g), AR(g); DistMatrix<T,MR,STAR> BT(g), BB(g); DistMatrix<T,MC,MR> CTL(g), CTR(g), CBL(g), CBR(g); DistMatrix<T,MC,MR> DTL(g), DBR(g); const int half = C.Height()/2; ScaleTrapezoid( beta, LEFT, uplo, 0, C ); LockedPartitionRight( A, AL, AR, half ); LockedPartitionDown ( B, BT, BB, half ); PartitionDownDiagonal ( C, CTL, CTR, CBL, CBR, half ); DTL.AlignWith( CTL ); DBR.AlignWith( CBR ); DTL.ResizeTo( CTL.Height(), CTL.Width() ); DBR.ResizeTo( CBR.Height(), CBR.Width() ); //------------------------------------------------------------------------// if( uplo == LOWER ) internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AR, BT, T(1), CBL ); else internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AL, BB, T(1), CTR ); internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AL, BT, T(0), DTL ); AxpyTriangle( uplo, T(1), DTL, CTL ); internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AR, BB, T(0), DBR ); AxpyTriangle( uplo, T(1), DBR, CBR ); //------------------------------------------------------------------------// #ifndef RELEASE PopCallStack(); #endif }
static PetscErrorCode RHSJacobian_function( TS ts, double t_, Vec u, Mat A, Mat B, void* G_u ) { Vector U( u, Vector::owner::other ); Matrix AA( A, false ); Matrix BB( B, false ); TimeStepper T( ts, false ); ( *(Jac*)G_u )( U, AA, BB, T, t_ ); return 0; }
int simEmbGetRotationAxis(const float* quaternionStart,const float* quaternionGoal,float* axis,float* angle) { if (!hasLaunched()) return(-1); // V-REP quaternion, internally: w x y z // V-REP quaternion, at interfaces: x y z w (like ROS) C4Vector qStart; qStart(0)=quaternionStart[3]; qStart(1)=quaternionStart[0]; qStart(2)=quaternionStart[1]; qStart(3)=quaternionStart[2]; C4Vector qGoal; qGoal(0)=quaternionGoal[3]; qGoal(1)=quaternionGoal[0]; qGoal(2)=quaternionGoal[1]; qGoal(3)=quaternionGoal[2]; // Following few lines taken from the quaternion interpolation part: C4Vector AA(qStart); C4Vector BB(qGoal); if (AA(0)*BB(0)+AA(1)*BB(1)+AA(2)*BB(2)+AA(3)*BB(3)<0.0f) AA=AA*-1.0f; C4Vector r((AA.getInverse()*BB).getAngleAndAxis()); C3Vector v(r(1),r(2),r(3)); v=AA*v; axis[0]=v(0); axis[1]=v(1); axis[2]=v(2); float l=sqrt(v(0)*v(0)+v(1)*v(1)+v(2)*v(2)); if (l!=0.0f) { axis[0]/=l; axis[1]/=l; axis[2]/=l; } angle[0]=r(0); return(1); }
void StudentTProcessNIG::precomputePrediction() { size_t n = mData.getNSamples(); size_t p = mMean.nFeatures(); mKF = trans(mMean.mFeatM); inplace_solve(mL,mKF,ublas::lower_tag()); //TODO: make one line matrixd DD(p,p); DD = prod(trans(mKF),mKF); utils::add_to_diagonal(DD,mInvVarW); utils::cholesky_decompose(DD,mD); vectord vn = mData.mY; inplace_solve(mL,vn,ublas::lower_tag()); mWMap = prod(mMean.mFeatM,vn) + utils::ublas_elementwise_prod(mInvVarW,mW0); utils::cholesky_solve(mD,mWMap,ublas::lower()); mVf = mData.mY - prod(trans(mMean.mFeatM),mWMap); inplace_solve(mL,mVf,ublas::lower_tag()); vectord v0 = mData.mY - prod(trans(mMean.mFeatM),mW0); //TODO: check for "cheaper" version //matrixd KK = prod(mL,trans(mL)); matrixd KK = computeCorrMatrix(); matrixd WW = zmatrixd(p,p); //TODO: diagonal matrix utils::add_to_diagonal(WW,mInvVarW); const matrixd FW = prod(trans(mMean.mFeatM),WW); KK += prod(FW,mMean.mFeatM); matrixd BB(n,n); utils::cholesky_decompose(KK,BB); inplace_solve(BB,v0,ublas::lower_tag()); mSigma = (mBeta/mAlpha + inner_prod(v0,v0))/(n+2*mAlpha); int dof = static_cast<int>(n+2*mAlpha); if ((boost::math::isnan(mWMap(0))) || (boost::math::isnan(mSigma))) { throw std::runtime_error("Error in precomputed prediction. NaN found."); } if (dof <= 0) { dof = n; FILE_LOG(logERROR) << "ERROR: Incorrect alpha. Dof invalid." << "Forcing Dof <= num of points."; } d_->setDof(dof); }
static void dumpBB(graph_t * g) { boxf bb; box b; bb = BB(g); b = GD_bb(g); prIndent(); fprintf(stderr, " LL (%f,%f) UR (%f,%f)\n", bb.LL.x, bb.LL.y, bb.UR.x, bb.UR.y); prIndent(); fprintf(stderr, " LL (%d,%d) UR (%d,%d)\n", b.LL.x, b.LL.y, b.UR.x, b.UR.y); }
/*-------------------------------------------------------------------------* * PL_KEEP_REST_FOR_PROLOG * * * * Update CP in choices points to be used by classical Prolog engine * * (some CPB(b) have been set to Call_Prolog_Success due to Call_Prolog). * *-------------------------------------------------------------------------*/ void Pl_Keep_Rest_For_Prolog(WamWord *query_b) { WamWord *b, *e, *query_e; for (b = B; b > query_b; b = BB(b)) if (CPB(b) == Adjust_CP(Call_Prolog_Success)) CPB(b) = CP; query_e = EB(query_b); for (e = EB(B); e > query_e; e = EE(e)) if (CPE(e) == Adjust_CP(Call_Prolog_Success)) CPE(e) = CP; }
inline Stat Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::SparseSparseSuperNodes,Variant::One> ::stat(const ScalarType alpha, CrsExecViewTypeA &A, CrsExecViewTypeB &B, const ScalarType beta, CrsExecViewTypeC &C) { DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> AA(A.Flat()); DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> BB(B.Flat()); DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> CC(C.Flat()); return Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::ExternalBlas,Variant::One> ::stat(alpha, AA, BB, beta, CC); }
void print_board(struct Position* pos) { printf("Board:\n"); int i, piece; for (i = 0; i != 64; ++i) { if (i && !(i & 7)) printf("\n"); piece = pos->board[i ^ 56]; if (!piece) printf("- "); else printf("%c ", get_char_from_piece(piece, (BB((i ^ 56)) & pos->bb[WHITE] ? WHITE : BLACK))); } printf("\n"); printf("PosKey: %llu\n", pos->state->pos_key); printf("PawnKey: %llu\n", pos->state->pawn_key); }
string getHint(string secret, string guess) { int A = 0 , B = 0 , n = (int)secret.size (); vector <int> AA (10 , 0) , BB (10 , 0); for (int i = 0 ; i < n ; ++ i) { if (secret[i] == guess[i]) A ++; else { AA[secret[i] - '0'] ++; BB[guess[i] - '0'] ++; } } for (int i = 0 ; i < 10 ; i ++) { B += min (AA[i] , BB[i]); } char str[100]; sprintf (str , "%dA%dB" , A , B); return string (str); }
/// This method returns a copy of this bounding box that is slightly enlarged by Epsilon (or shrunk if Epsilon is negative). /// The returned box is very useful with the containment / intersection / test methods when rounding errors are an issue! /// Note that it is easy to control the desired effect by passing either a positive number to make the box slightly larger, /// or by passing a negative number to make the box slightly smaller. /// For example, if BB is a bounding box, BB.GetEpsilonBox(0.1).Contains(A) returns true even if A is actually a bit outside of BB, /// or BB.GetEpsilonBox(-0.3).Intersects(OtherBB) yields false even if BB and OtherBB are neighboured and share a plane. /// @param Epsilon The amount by which the bounding-box is expanded. BoundingBox3T<T> GetEpsilonBox(const T Epsilon) const { assert(IsInited()); const Vector3T<T> Eps=Vector3T<T>(Epsilon, Epsilon, Epsilon); BoundingBox3T<T> BB(*this); // Don't use the (Min-Eps, Max+Eps) constructor here, as it involved an additional call to Insert(). BB.Min-=Eps; BB.Max+=Eps; // Maybe the box got smaller, now make sure it didn't get negative. if (BB.Min.x>BB.Max.x) BB.Min.x=BB.Max.x=(BB.Min.x+BB.Max.x)*0.5f; if (BB.Min.y>BB.Max.y) BB.Min.y=BB.Max.y=(BB.Min.y+BB.Max.y)*0.5f; if (BB.Min.z>BB.Max.z) BB.Min.z=BB.Max.z=(BB.Min.z+BB.Max.z)*0.5f; return BB; }
PlaneSector::PlaneSector( const Point& A, const Point& B, const Point& C) :Geometry(2) { if(C==B) { TheCenter=A; } else { real dp = (C-B)*(A-B); // This is != 0, tested in E2secitf. TheCenter = (A+(A-B)*((C-B)*((C+B)/2-A))/dp); }; TheInnerRadius=(TheCenter-A).Length(); Point BB(B-TheCenter), CC(C-TheCenter); TheSmallAngle=atan2(BB.Y(),BB.X()); TheBigAngle=atan2(CC.Y(),CC.X()); if (TheBigAngle<=TheSmallAngle) TheBigAngle += 2*M_PI; }
arma_hot inline static void apply ( Mat<eT>& C, const TA& A, const TB& B, const eT alpha = eT(1), const eT beta = eT(0), const typename arma_not_cx<eT>::result* junk = 0 ) { arma_extra_debug_sigprint(); arma_ignore(junk); const uword A_n_rows = A.n_rows; const uword A_n_cols = A.n_cols; const uword B_n_rows = B.n_rows; const uword B_n_cols = B.n_cols; if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) && (A_n_rows == B_n_rows) && (B_n_rows == B_n_cols) ) { if(do_trans_B == false) { gemm_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(C, A, B, alpha, beta); } else { Mat<eT> BB(A_n_rows, A_n_rows); op_strans::apply_noalias_tinysq(BB, B); gemm_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(C, A, BB, alpha, beta); } } else { gemm_emul_large<do_trans_A, do_trans_B, use_alpha, use_beta>::apply(C, A, B, alpha, beta); } }
template<class T> Brush3T<T>::Brush3T(const Vector3T<T>& A, const Vector3T<T>& B, const Vector3T<T>& C, const T Epsilon, bool IncludeBevelPlanes) { Planes.PushBack(Plane3T<T>(A, B, C, Epsilon)); Planes.PushBack(Planes[0].GetMirror()); Planes.PushBack(Plane3T<T>(A, B, B+Planes[0].Normal, Epsilon)); Planes.PushBack(Plane3T<T>(B, C, C+Planes[0].Normal, Epsilon)); Planes.PushBack(Plane3T<T>(C, A, A+Planes[0].Normal, Epsilon)); if (!IncludeBevelPlanes) return; BoundingBox3T<T> BB(A, B); BB.Insert(C); Planes.PushBack(Plane3T<T>(Vector3T<T>(-1.0, 0.0, 0.0), -BB.Min.x)); // Left plane. Planes.PushBack(Plane3T<T>(Vector3T<T>( 1.0, 0.0, 0.0), BB.Max.x)); // Right plane. Planes.PushBack(Plane3T<T>(Vector3T<T>( 0.0, -1.0, 0.0), -BB.Min.y)); // Near plane. Planes.PushBack(Plane3T<T>(Vector3T<T>( 0.0, 1.0, 0.0), BB.Max.y)); // Far plane. Planes.PushBack(Plane3T<T>(Vector3T<T>( 0.0, 0.0, -1.0), -BB.Min.z)); // Bottom plane. Planes.PushBack(Plane3T<T>(Vector3T<T>( 0.0, 0.0, 1.0), BB.Max.z)); // Top plane. }
double GaussianProcessNormal::negativeLogLikelihood() { matrixd KK = computeCorrMatrix(); const size_t n = KK.size1(); const size_t p = mMean->nFeatures(); vectord v0 = mGPY - prod(trans(mFeatM),mW0); matrixd WW = zmatrixd(p,p); //TODO: diagonal matrix utils::addToDiagonal(WW,mInvVarW); matrixd FW = prod(trans(mFeatM),WW); KK += prod(FW,mFeatM); matrixd BB(n,n); utils::cholesky_decompose(KK,BB); inplace_solve(BB,v0,ublas::lower_tag()); double zz = inner_prod(v0,v0); double lik = 1/(2*mSigma) * zz; lik += utils::log_trace(BB); return lik; }
double StudentTProcessNIG::negativeLogLikelihood() { matrixd KK = computeCorrMatrix(); const size_t n = KK.size1(); const size_t p = mMean.nFeatures(); const size_t nalpha = (n+2*mAlpha); vectord v0 = mData.mY - prod(trans(mMean.mFeatM),mW0); matrixd WW = zmatrixd(p,p); //TODO: diagonal matrix utils::add_to_diagonal(WW,mInvVarW); matrixd FW = prod(trans(mMean.mFeatM),WW); KK += prod(FW,mMean.mFeatM); matrixd BB(n,n); utils::cholesky_decompose(KK,BB); inplace_solve(BB,v0,ublas::lower_tag()); double zz = inner_prod(v0,v0); double sigmaMap = (mBeta/mAlpha + zz)/nalpha; double lik = nalpha/2 * std::log(1+zz/(2*mBeta*sigmaMap)); lik += utils::log_trace(BB); lik += n/2 * std::log(sigmaMap); return lik; }
inline void SymmLLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A10, A11 ); LockedView2x1 ( AColPan, A11, A21 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Trans_MR_STAR.TransposeFrom( B1 ); LocalGemm ( NORMAL, TRANSPOSE, alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow ); LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; B1Trans_MR_STAR.TransposeFrom( B1 ); // C[MC,MR] += alpha A1[MC,*] (B1^T[MR,*])^T // = alpha A1[MC,*] B1[*,MR] LocalGemm ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionDown( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Syr2kUT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( UPPER, orientation, TRANSPOSE, orientation, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* ---- findelemex will be called as : j_el=findelemex(xp,yp,AR,A,B,T); ---------------------------- */ /* ---- xp,yp are NOT nodal coordinates; they are the points we are finding elements for. Nodal coordinates have already been accounted for in A,B,T ----- */ int ip,j,np,nl,nh,ne; double *xp, *yp; double *AR,*A,*B,*T; double *fnd; double NaN=mxGetNaN(); double fac,S1,S2,S3,ONE,ZERO; double tol,*tolerance; /* ---- check I/O arguments ----------------------------------------- */ if (nrhs != 7) mexErrMsgTxt("findelemex requires 7 input arguments."); else if (nlhs != 1) mexErrMsgTxt("findelemex requires 1 output arguments."); /* ---- dereference input arrays ------------------------------------ */ xp =mxGetPr(prhs[0]); yp =mxGetPr(prhs[1]); AR =mxGetPr(prhs[2]); A =mxGetPr(prhs[3]); B =mxGetPr(prhs[4]); T =mxGetPr(prhs[5]); tolerance=mxGetPr(prhs[6]); tol=tolerance[0]; np=mxGetM(prhs[0]); ne=mxGetM(prhs[2]); /* ---- allocate space for list containing element numbers following NRC allocation style double *mxDvector(int nl,int nh) fnd= (double *) mxDvector(0,np); ---------------------------- */ fnd= (double *) mxDvector(0,np); for (ip=0;ip<np;ip++)fnd[ip]=-1.; ONE=1.+tol; ZERO=0.-tol; for (j=0;j<ne;j++){ for (ip=0;ip<np;ip++){ if(fnd[ip]<(double)0){ fac=.5/AR[j]; S1=(TT(j,0,ne)+BB(j,0,ne)*xp[ip]+AA(j,0,ne)*yp[ip])*fac; if (S1>ONE|S1<ZERO)goto l20; S2=(TT(j,1,ne)+BB(j,1,ne)*xp[ip]+AA(j,1,ne)*yp[ip])*fac; if (S2>ONE|S2<ZERO)goto l20; S3=(TT(j,2,ne)+BB(j,2,ne)*xp[ip]+AA(j,2,ne)*yp[ip])*fac; if (S3>ONE|S3<ZERO)goto l20; fnd[ip]=(double)(j+1); } l20: continue; } } for (ip=0;ip<np;ip++) if(fnd[ip]<(double)0)fnd[ip]=NaN; /* ---- Set elements of return matrix, pointed to by plhs[0] -------- */ plhs[0]=mxCreateDoubleMatrix(np,1,mxREAL); mxSetPr(plhs[0],fnd); /* ---- No need to free memory allocated with "mxCalloc"; MATLAB does this automatically. The CMEX allocation functions in "opnml_allocs.c" use mxCalloc. ----------------------------------- */ return; }
/*---------------------------------------------------------------------------*/ static int TTWAIN_MemoryXferHandler(void) { TW_IMAGEMEMXFER *imageMemXfer = 0; TW_HANDLE imageMemXferH = 0; TW_HANDLE transferBufferH = 0; TW_SETUPMEMXFER setup; TW_IMAGEINFO info; TW_IMAGELAYOUT imageLayout; TUINT32 nTransferDone; TW_INT16 rc1, rc2, rc3, rc4, twRC2; int ret = FALSE; int stopScanning = 0; UCHAR *transferBuffer = 0; UCHAR *sourceBuffer = 0; UCHAR *targetBuffer = 0; unsigned int rows; double pixSize; int extraX = 0; int extraY = 0; TW_UINT32 rowsToCopy = 0; TW_UINT32 rowsRemaining = 0; TW_UINT32 bytesToCopy = 0; TW_UINT32 bytesToWrap = 0; TW_UINT32 memorySize = 0; int imgInfoOk; /* on Mac often (always) is impossible to get the imageinfo about the transfer... so no I can't prealloc memory and do other checks about size etc... */ /*printf("%s\n", __PRETTY_FUNCTION__);*/ memset(&info, 0, sizeof(TW_IMAGEINFO)); rc1 = TTWAIN_DS(DG_IMAGE, DAT_IMAGEINFO, MSG_GET, (TW_MEMREF)&info); imgInfoOk = (rc1 == TWRC_SUCCESS); /*printf("get image info returns %d\n", imgInfoOk);*/ rc4 = TTWAIN_DS(DG_IMAGE, DAT_IMAGELAYOUT, MSG_GET, &imageLayout); /* determine the transfer buffer size */ rc2 = TTWAIN_DS(DG_CONTROL, DAT_SETUPMEMXFER, MSG_GET, (TW_MEMREF)&setup); transferBufferH = GLOBAL_ALLOC(GMEM_FIXED, setup.Preferred); if (!transferBufferH) return FALSE; transferBuffer = (UCHAR *)GLOBAL_LOCK(transferBufferH); if (imgInfoOk) { pixSize = info.BitsPerPixel / 8.0; memorySize = info.ImageLength * CEIL(info.ImageWidth * pixSize); } else { /* we need to allocate incrementally the memory needs to store the image*/ memorySize = setup.Preferred; /* start using the setupmemxfer.preferred size*/ pixSize = 3; } if (TTwainData.transferInfo.usageMode == TTWAIN_MODE_UNLEASHED) { /* TTwainData.transferInfo = GLOBAL_ALLOC(GMEM_FIXED, memorySize); */ TTwainData.transferInfo.memoryBuffer = (UCHAR *)malloc(memorySize); if (!TTwainData.transferInfo.memoryBuffer) { /*tmsg_error("unable to allocate memory!");*/ return FALSE; } if (imgInfoOk) { TTwainData.transferInfo.memorySize = memorySize; TTwainData.transferInfo.preferredLx = info.ImageWidth; TTwainData.transferInfo.preferredLy = info.ImageLength; } else { TTwainData.transferInfo.memorySize = setup.Preferred; TTwainData.transferInfo.preferredLx = 0; TTwainData.transferInfo.preferredLy = 0; } } extraX = info.ImageWidth - TTwainData.transferInfo.preferredLx; extraY = info.ImageLength - TTwainData.transferInfo.preferredLy; rowsRemaining = MIN(TTwainData.transferInfo.preferredLy, info.ImageLength); targetBuffer = TTwainData.transferInfo.memoryBuffer; /*clean-up the buffer memset(targetBuffer, 0xff, TTwainData.transferInfo.memorySize); */ imageMemXferH = GLOBAL_ALLOC(GMEM_FIXED, sizeof(TW_IMAGEMEMXFER)); if (!imageMemXferH) return FALSE; imageMemXfer = (TW_IMAGEMEMXFER *)GLOBAL_LOCK(imageMemXferH); imageMemXfer->Memory.TheMem = (char *)transferBuffer; imageMemXfer->Memory.Length = setup.Preferred; imageMemXfer->Memory.Flags = TWMF_APPOWNS | TWMF_POINTER; TTwainData.transferInfo.pendingXfers.Count = 0; /* transfer the data -- loop until done or canceled */ nTransferDone = 0; do { rc3 = TTWAIN_DS(DG_IMAGE, DAT_IMAGEMEMXFER, MSG_GET, (TW_MEMREF)imageMemXfer); nTransferDone++; switch (rc3) { case TWRC_SUCCESS: PRINTF("IMAGEMEMXFER, GET, returns SUCCESS\n"); if (imgInfoOk) { TW_UINT32 colsToCopy; rowsToCopy = MIN(imageMemXfer->Rows, rowsRemaining); colsToCopy = MIN(imageMemXfer->Columns, (unsigned long)TTwainData.transferInfo.preferredLx); bytesToCopy = CEIL(colsToCopy * pixSize); bytesToWrap = CEIL(TTwainData.transferInfo.preferredLx * pixSize); } else { TW_UINT32 newMemorySize; rowsToCopy = imageMemXfer->Rows; bytesToCopy = imageMemXfer->BytesPerRow; bytesToWrap = bytesToCopy; newMemorySize = (TTwainData.transferInfo.preferredLy + imageMemXfer->Rows) * imageMemXfer->BytesPerRow; if (TTwainData.transferInfo.memorySize < newMemorySize) { TTwainData.transferInfo.memoryBuffer = (UCHAR *)realloc(TTwainData.transferInfo.memoryBuffer, newMemorySize); TTwainData.transferInfo.memorySize = newMemorySize; targetBuffer = TTwainData.transferInfo.memoryBuffer + (TTwainData.transferInfo.preferredLy * imageMemXfer->BytesPerRow); } TTwainData.transferInfo.preferredLy += rowsToCopy; if ((int)imageMemXfer->Columns > TTwainData.transferInfo.preferredLx) TTwainData.transferInfo.preferredLx = imageMemXfer->Columns; } sourceBuffer = (UCHAR *)imageMemXfer->Memory.TheMem; if (TTwainData.transferInfo.nextImageNeedsToBeInverted) INVERT_BYTE(sourceBuffer, bytesToCopy) for (rows = 0; rows < rowsToCopy; rows++) { memcpy(targetBuffer, sourceBuffer, bytesToCopy); targetBuffer += bytesToWrap; sourceBuffer += imageMemXfer->BytesPerRow; } rowsRemaining -= rowsToCopy; break; case TWRC_XFERDONE: PRINTF("IMAGEMEMXFER, GET, returns XFERDONE\n"); /*copy the last transfer data*/ if (imgInfoOk) { TW_UINT32 colsToCopy; rowsToCopy = MIN(imageMemXfer->Rows, rowsRemaining); colsToCopy = MIN(imageMemXfer->Columns, (unsigned long)TTwainData.transferInfo.preferredLx); bytesToCopy = CEIL(colsToCopy * pixSize); bytesToWrap = CEIL(TTwainData.transferInfo.preferredLx * pixSize); } else { TW_UINT32 newMemorySize; rowsToCopy = imageMemXfer->Rows; bytesToCopy = imageMemXfer->BytesPerRow; bytesToWrap = bytesToCopy; newMemorySize = (TTwainData.transferInfo.preferredLy + imageMemXfer->Rows) * imageMemXfer->BytesPerRow; if (TTwainData.transferInfo.memorySize < newMemorySize) { TTwainData.transferInfo.memoryBuffer = (UCHAR *)realloc(TTwainData.transferInfo.memoryBuffer, newMemorySize); TTwainData.transferInfo.memorySize = newMemorySize; targetBuffer = TTwainData.transferInfo.memoryBuffer + (TTwainData.transferInfo.preferredLy * imageMemXfer->BytesPerRow); } TTwainData.transferInfo.preferredLy += rowsToCopy; if ((int)imageMemXfer->Columns > TTwainData.transferInfo.preferredLx) TTwainData.transferInfo.preferredLx = imageMemXfer->Columns; } sourceBuffer = (UCHAR *)imageMemXfer->Memory.TheMem; if (TTwainData.transferInfo.nextImageNeedsToBeInverted) INVERT_BYTE(sourceBuffer, bytesToCopy) for (rows = 0; rows < rowsToCopy; rows++) { memcpy(targetBuffer, sourceBuffer, bytesToCopy); targetBuffer += bytesToWrap; sourceBuffer += imageMemXfer->BytesPerRow; } rowsRemaining -= rowsToCopy; PRINTF("get pending xfers\n"); twRC2 = TTWAIN_DS(DG_CONTROL, DAT_PENDINGXFERS, MSG_ENDXFER, (TW_MEMREF)&TTwainData.transferInfo.pendingXfers); if (twRC2 != TWRC_SUCCESS) { printf("pending xfers != success"); ret = FALSE; goto done; } PRINTF(" pending count = %d\n", TTwainData.transferInfo.pendingXfers.Count); if (TTwainData.transferInfo.pendingXfers.Count == 0) { ret = TRUE; goto done; } if (TTwainData.transferInfo.pendingXfers.Count == 0xffff) { ret = TRUE; goto done; } if (TTwainData.transferInfo.pendingXfers.Count == 0xfffe) { ret = TRUE; goto done; } ret = TRUE; goto done; case TWRC_CANCEL: TTWAIN_RecordError(); twRC2 = TTWAIN_DS(DG_CONTROL, DAT_PENDINGXFERS, MSG_ENDXFER, (TW_MEMREF)&TTwainData.transferInfo.pendingXfers); if (twRC2 != TWRC_SUCCESS) { ret = FALSE; goto done; } if (TTwainData.transferInfo.pendingXfers.Count == 0) { ret = FALSE; goto done; } break; case TWRC_FAILURE: PRINTF("IMAGEMEMXFER, GET, returns FAILURE\n"); TTWAIN_RecordError(); twRC2 = TTWAIN_DS(DG_CONTROL, DAT_PENDINGXFERS, MSG_ENDXFER, (TW_MEMREF)&TTwainData.transferInfo.pendingXfers); if (twRC2 != TWRC_SUCCESS) { ret = FALSE; goto done; } if (TTwainData.transferInfo.pendingXfers.Count == 0) { ret = FALSE; goto done; } break; default: PRINTF("IMAGEMEMXFER, GET, returns ?!? Default handler called\n"); /* Abort the image */ TTWAIN_RecordError(); twRC2 = TTWAIN_DS(DG_CONTROL, DAT_PENDINGXFERS, MSG_ENDXFER, (TW_MEMREF)&TTwainData.transferInfo.pendingXfers); if (twRC2 != TWRC_SUCCESS) { ret = FALSE; goto done; } if (TTwainData.transferInfo.pendingXfers.Count == 0) { ret = FALSE; goto done; } } } while (rc3 == TWRC_SUCCESS); done: if (ret == TRUE) { if (TTwainData.callback.onDoneCb) { float xdpi, ydpi; TTWAIN_PIXTYPE pixType; xdpi = TTWAIN_Fix32ToFloat(info.XResolution); ydpi = TTWAIN_Fix32ToFloat(info.YResolution); if (imgInfoOk) { xdpi = TTWAIN_Fix32ToFloat(info.XResolution); ydpi = TTWAIN_Fix32ToFloat(info.YResolution); switch (BB(info.PixelType, info.BitsPerPixel)) { case BB(TWPT_BW, 1): pixType = TTWAIN_BW; break; case BB(TWPT_GRAY, 8): pixType = TTWAIN_GRAY8; break; case BB(TWPT_RGB, 24): pixType = TTWAIN_RGB24; break; default: pixType = TTWAIN_RGB24; break; } } else { float lx = TTWAIN_Fix32ToFloat(imageLayout.Frame.Right) - TTWAIN_Fix32ToFloat(imageLayout.Frame.Left); float ly = TTWAIN_Fix32ToFloat(imageLayout.Frame.Bottom) - TTWAIN_Fix32ToFloat(imageLayout.Frame.Top); xdpi = (float)TTwainData.transferInfo.preferredLx / lx; ydpi = (float)TTwainData.transferInfo.preferredLy / ly; switch (imageMemXfer->BytesPerRow / TTwainData.transferInfo.preferredLx) { case 1: pixType = TTWAIN_GRAY8; break; case 3: pixType = TTWAIN_RGB24; break; default: { double b = (imageMemXfer->BytesPerRow / (double)TTwainData.transferInfo.preferredLx); if ((b >= 0.125) && (b < 8)) pixType = TTWAIN_BW; else { printf("unable to det pix type assume RGB24\n"); pixType = TTWAIN_RGB24; } break; } } } stopScanning = !TTwainData.callback.onDoneCb( TTwainData.transferInfo.memoryBuffer, pixType, TTwainData.transferInfo.preferredLx, TTwainData.transferInfo.preferredLy, TTwainData.transferInfo.preferredLx, xdpi, ydpi, TTwainData.callback.onDoneArg); #ifdef MACOSX PRINTF("stopScanning = %d\n", stopScanning); exitTwainSession(); #endif } } else /*ret == FALSE*/ { if (TTwainData.callback.onErrorCb) { TTwainData.callback.onErrorCb(TTwainData.callback.onErrorArg, 0); } } if (imageMemXferH) { GLOBAL_UNLOCK(imageMemXferH); GLOBAL_FREE(imageMemXferH); } if (transferBufferH) { GLOBAL_UNLOCK(transferBuffer); GLOBAL_FREE(transferBufferH); } return ret && !stopScanning; }
// { dg-do compile } // Copyright (C) 2004 Free Software Foundation, Inc. // Contributed by Nathan Sidwell 23 Sep 2004 <*****@*****.**> // Origin: Wolfgang Bangerth <*****@*****.**> // Follow on from Bug 16889:Undetected ambiguity. struct B { int f(); // { dg-message "int B::f" } }; struct B1 : virtual B {}; struct B2 : B {}; struct B2_2 : B2 {}; struct BB : B1, B2_2 {}; int i = BB().f(); // { dg-error "ambiguous" }
inline void GemmTTA ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTA expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MR, STAR> D1_MR_STAR(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); B1_STAR_MC.AlignWith( A ); D1_MR_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionRight( C, CL, CR, 0 ); while( BB.Height() > 0 ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_MR_STAR ); //--------------------------------------------------------------------// B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR] // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T // = alpha (A^T)[MR,MC] (B1^T)[MC,*] LocalGemm ( orientationOfA, orientationOfB, alpha, A, B1_STAR_MC, T(0), D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }