inline void SUMMA_NNA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_NNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must have the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T,MC,STAR> D1_MC_STAR(g); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); D1_MC_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// B1_VR_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); // D1[MC,*] := alpha A[MC,MR] B1[MR,*] LocalGemm( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, D1_MC_STAR ); // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows C1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } }
inline void Trr2kNNTT ( UpperOrLower uplo, Orientation orientationOfC, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNTT"); if( E.Height() != E.Width() || A.Width() != C.Height() || A.Height() != E.Height() || C.Width() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Height() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNTT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionRight ( DL, /**/ DR, D0, /**/ D1, D2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_STAR_MC = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, orientationOfC, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_STAR_MC, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::HemmLLA ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::HemmLLA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T,MC,MR> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Adj_STAR_MR(g); DistMatrix<T,MC,MR > Z1(g); DistMatrix<T,MC,STAR> Z1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); Scal( beta, C ); LockedPartitionRight ( B, BL, BR, 0 ); PartitionRight ( C, CL, CR, 0 ); while( CL.Width() < C.Width() ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_MC_STAR.AlignWith( A ); B1_VR_STAR.AlignWith( A ); B1Adj_STAR_MR.AlignWith( A ); Z1_MC_STAR.AlignWith( A ); Z1_MR_STAR.AlignWith( A ); Z1.AlignWith( C1 ); Z1_MC_STAR.ResizeTo( C1.Height(), C1.Width() ); Z1_MR_STAR.ResizeTo( C1.Height(), C1.Width() ); //--------------------------------------------------------------------// B1_MC_STAR = B1; B1_VR_STAR = B1_MC_STAR; B1Adj_STAR_MR.AdjointFrom( B1_VR_STAR ); Zero( Z1_MC_STAR ); Zero( Z1_MR_STAR ); internal::LocalSymmetricAccumulateLL ( ADJOINT, alpha, A, B1_MC_STAR, B1Adj_STAR_MR, Z1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); Z1 = Z1_MR_MC; Z1.SumScatterUpdate( (T)1, Z1_MC_STAR ); Axpy( (T)1, Z1, C1 ); //--------------------------------------------------------------------// B1_MC_STAR.FreeAlignments(); B1_VR_STAR.FreeAlignments(); B1Adj_STAR_MR.FreeAlignments(); Z1_MC_STAR.FreeAlignments(); Z1_MR_STAR.FreeAlignments(); Z1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
int test_string_cast_vector() { int Error = 0; { glm::vec2 A1(1, 2); std::string A2 = glm::to_string(A1); Error += A2 != std::string("vec2(1.000000, 2.000000)") ? 1 : 0; glm::vec3 B1(1, 2, 3); std::string B2 = glm::to_string(B1); Error += B2 != std::string("vec3(1.000000, 2.000000, 3.000000)") ? 1 : 0; glm::vec4 C1(1, 2, 3, 4); std::string C2 = glm::to_string(C1); Error += C2 != std::string("vec4(1.000000, 2.000000, 3.000000, 4.000000)") ? 1 : 0; glm::dvec2 J1(1, 2); std::string J2 = glm::to_string(J1); Error += J2 != std::string("dvec2(1.000000, 2.000000)") ? 1 : 0; glm::dvec3 K1(1, 2, 3); std::string K2 = glm::to_string(K1); Error += K2 != std::string("dvec3(1.000000, 2.000000, 3.000000)") ? 1 : 0; glm::dvec4 L1(1, 2, 3, 4); std::string L2 = glm::to_string(L1); Error += L2 != std::string("dvec4(1.000000, 2.000000, 3.000000, 4.000000)") ? 1 : 0; } { glm::bvec2 M1(false, true); std::string M2 = glm::to_string(M1); Error += M2 != std::string("bvec2(false, true)") ? 1 : 0; glm::bvec3 O1(false, true, false); std::string O2 = glm::to_string(O1); Error += O2 != std::string("bvec3(false, true, false)") ? 1 : 0; glm::bvec4 P1(false, true, false, true); std::string P2 = glm::to_string(P1); Error += P2 != std::string("bvec4(false, true, false, true)") ? 1 : 0; } { glm::ivec2 D1(1, 2); std::string D2 = glm::to_string(D1); Error += D2 != std::string("ivec2(1, 2)") ? 1 : 0; glm::ivec3 E1(1, 2, 3); std::string E2 = glm::to_string(E1); Error += E2 != std::string("ivec3(1, 2, 3)") ? 1 : 0; glm::ivec4 F1(1, 2, 3, 4); std::string F2 = glm::to_string(F1); Error += F2 != std::string("ivec4(1, 2, 3, 4)") ? 1 : 0; } { glm::i8vec2 D1(1, 2); std::string D2 = glm::to_string(D1); Error += D2 != std::string("i8vec2(1, 2)") ? 1 : 0; glm::i8vec3 E1(1, 2, 3); std::string E2 = glm::to_string(E1); Error += E2 != std::string("i8vec3(1, 2, 3)") ? 1 : 0; glm::i8vec4 F1(1, 2, 3, 4); std::string F2 = glm::to_string(F1); Error += F2 != std::string("i8vec4(1, 2, 3, 4)") ? 1 : 0; } { glm::i16vec2 D1(1, 2); std::string D2 = glm::to_string(D1); Error += D2 != std::string("i16vec2(1, 2)") ? 1 : 0; glm::i16vec3 E1(1, 2, 3); std::string E2 = glm::to_string(E1); Error += E2 != std::string("i16vec3(1, 2, 3)") ? 1 : 0; glm::i16vec4 F1(1, 2, 3, 4); std::string F2 = glm::to_string(F1); Error += F2 != std::string("i16vec4(1, 2, 3, 4)") ? 1 : 0; } { glm::i64vec2 D1(1, 2); std::string D2 = glm::to_string(D1); Error += D2 != std::string("i64vec2(1, 2)") ? 1 : 0; glm::i64vec3 E1(1, 2, 3); std::string E2 = glm::to_string(E1); Error += E2 != std::string("i64vec3(1, 2, 3)") ? 1 : 0; glm::i64vec4 F1(1, 2, 3, 4); std::string F2 = glm::to_string(F1); Error += F2 != std::string("i64vec4(1, 2, 3, 4)") ? 1 : 0; } return Error; }
/* * GenerateBezier : * Use least-squares method to find Bezier control points for region. * */ QPointF* GenerateBezier(const QList<QPointF> &points, int first, int last, qreal *uPrime, FitVector tHat1, FitVector tHat2) { int i; int nPts; /* Number of pts in sub-curve */ qreal C[2][2]; /* Matrix C */ qreal X[2]; /* Matrix X */ qreal det_C0_C1, /* Determinants of matrices */ det_C0_X, det_X_C1; qreal alpha_l, /* Alpha values, left and right */ alpha_r; FitVector tmp; /* Utility variable */ QPointF *curve; curve = new QPointF[4]; nPts = last - first + 1; /* Precomputed rhs for eqn */ // FitVector A[nPts][2] QVector< QVector<FitVector> > A(nPts, QVector<FitVector>(2)); /* Compute the A's */ for (i = 0; i < nPts; ++i) { FitVector v1, v2; v1 = tHat1; v2 = tHat2; v1.scale(B1(uPrime[i])); v2.scale(B2(uPrime[i])); A[i][0] = v1; A[i][1] = v2; } /* Create the C and X matrices */ C[0][0] = 0.0; C[0][1] = 0.0; C[1][0] = 0.0; C[1][1] = 0.0; X[0] = 0.0; X[1] = 0.0; for (i = 0; i < nPts; ++i) { C[0][0] += (A[i][0]).dot(A[i][0]); C[0][1] += A[i][0].dot(A[i][1]); /* C[1][0] += V2Dot(&A[i][0], &A[i][1]);*/ C[1][0] = C[0][1]; C[1][1] += A[i][1].dot(A[i][1]); FitVector vfirstp1(points.at(first + i)); FitVector vfirst(points.at(first)); FitVector vlast(points.at(last)); tmp = VectorSub(vfirstp1, VectorAdd( VectorScale(vfirst, B0(uPrime[i])), VectorAdd( VectorScale(vfirst, B1(uPrime[i])), VectorAdd( VectorScale(vlast, B2(uPrime[i])), VectorScale(vlast, B3(uPrime[i])))))); X[0] += A[i][0].dot(tmp); X[1] += A[i][1].dot(tmp); } /* Compute the determinants of C and X */ det_C0_C1 = C[0][0] * C[1][1] - C[1][0] * C[0][1]; det_C0_X = C[0][0] * X[1] - C[0][1] * X[0]; det_X_C1 = X[0] * C[1][1] - X[1] * C[0][1]; /* Finally, derive alpha values */ if (qFuzzyCompare(det_C0_C1, qreal(0.0))) { det_C0_C1 = (C[0][0] * C[1][1]) * 10e-12; if (qFuzzyCompare(det_C0_C1, qreal(0.0))) { det_C0_C1 = Zero; } } alpha_l = det_X_C1 / det_C0_C1; alpha_r = det_C0_X / det_C0_C1; /* If alpha negative, use the Wu/Barsky heuristic (see text) */ /* (if alpha is 0, you get coincident control points that lead to * divide by zero in any subsequent NewtonRaphsonRootFind() call. */ if (alpha_l < 1.0e-6 || alpha_r < 1.0e-6) { qreal dist = distance(points.at(last), points.at(first)) / 3.0; curve[0] = points.at(first); curve[3] = points.at(last); tHat1.scale(dist); tHat2.scale(dist); curve[1] = tHat1 + curve[0]; curve[2] = tHat2 + curve[3]; return curve; } /* First and last control points of the Bezier curve are */ /* positioned exactly at the first and last data points */ /* Control points 1 and 2 are positioned an alpha distance out */ /* on the tangent vectors, left and right, respectively */ curve[0] = points.at(first); curve[3] = points.at(last); tHat1.scale(alpha_l); tHat2.scale(alpha_r); curve[1] = tHat1 + curve[0]; curve[2] = tHat2 + curve[3]; return (curve); }
inline void HemmRUC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error("{A,B,C} must be distributed on the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g), CLeft(g), CRight(g); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> AColPan_VR_STAR(g); DistMatrix<T,STAR,MR > AColPanAdj_STAR_MR(g); DistMatrix<T,MR, STAR> ARowPanAdj_MR_STAR(g); B1_MC_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( CR.Width() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); ARowPan.LockedView1x2( A11, A12 ); AColPan.LockedView2x1 ( A01, A11 ); CLeft.View1x2( C0, C1 ); CRight.View1x2( C1, C2 ); AColPan_VR_STAR.AlignWith( CLeft ); AColPanAdj_STAR_MR.AlignWith( CLeft ); ARowPanAdj_MR_STAR.AlignWith( CRight ); //--------------------------------------------------------------------// B1_MC_STAR = B1; AColPan_VR_STAR = AColPan; AColPanAdj_STAR_MR.AdjointFrom( AColPan_VR_STAR ); ARowPanAdj_MR_STAR.AdjointFrom( ARowPan ); MakeTrapezoidal( LEFT, LOWER, 0, ARowPanAdj_MR_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, AColPanAdj_STAR_MR ); LocalGemm ( NORMAL, ADJOINT, alpha, B1_MC_STAR, ARowPanAdj_MR_STAR, T(1), CRight ); LocalGemm ( NORMAL, NORMAL, alpha, B1_MC_STAR, AColPanAdj_STAR_MR, T(1), CLeft ); //--------------------------------------------------------------------// AColPan_VR_STAR.FreeAlignments(); AColPanAdj_STAR_MR.FreeAlignments(); ARowPanAdj_MR_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::Syr2kLN ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::Syr2kLN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Syr2kLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); DistMatrix<T,STAR,MR > B1Trans_STAR_MR(g); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); B1Trans_STAR_MR.AlignWith( C ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR ); B1_VR_STAR = B1_MC_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); internal::LocalTrr2k ( LOWER, alpha, A1_MC_STAR, B1Trans_STAR_MR, B1_MC_STAR, A1Trans_STAR_MR, (T)1, C ); //--------------------------------------------------------------------// A1_MC_STAR.FreeAlignments(); B1_MC_STAR.FreeAlignments(); A1_VR_STAR.FreeAlignments(); B1_VR_STAR.FreeAlignments(); A1Trans_STAR_MR.FreeAlignments(); B1Trans_STAR_MR.FreeAlignments(); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void internal::GemmTNA ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNA assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T,MC,MR> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,MR,STAR> D1_MR_STAR(g); DistMatrix<T,MR,MC > D1_MR_MC(g); DistMatrix<T,MC,MR > D1(g); // Start the algorithm Scal( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_MC_STAR.AlignWith( A ); D1_MR_STAR.AlignWith( A ); D1_MR_STAR.ResizeTo( C1.Height(), C1.Width() ); D1.AlignWith( C1 ); //--------------------------------------------------------------------// B1_MC_STAR = B1; // B1[MC,*] <- B1[MC,MR] // D1[MR,*] := alpha (A1[MC,MR])^T B1[MC,*] // = alpha (A1^T)[MR,MC] B1[MC,*] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A, B1_MC_STAR, (T)0, D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( (T)1, D1, C1 ); //--------------------------------------------------------------------// B1_MC_STAR.FreeAlignments(); D1_MR_STAR.FreeAlignments(); D1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
int main(void){ // hier komt de test /* int i = 0, j; char letter = 'a'; char buffer[3]; std::string* hulp; for(; i < h_nodes; i++){ for(j = 0; j < v_nodes; j++){ sprintf(buffer, "%c%d" , letter, j); hulp = new std::string(buffer); new Node(*hulp); delete hulp; } letter++; if(letter > 'z'){ letter = 'a'; } } printf("%p\n", Node::getParticularNode(std::string("a1"))); Node::getParticularNode(std::string("a1"))->print(); printf("%p\n", Node::getParticularNode(std::string("b1"))); Node::getParticularNode(std::string("b1"))->print(); Node::deleteAllNodes(); printf("%p\n", Node::getParticularNode(std::string("a1"))); Node::getParticularNode(std::string("a1"))->print(); */ Node A1("A1"); A1.addNeighbournode("B1", 4); A1.addNeighbournode("B2", 2); Node B1("B1"); B1.addNeighbournode("C1", 2); Node B2("B2"); B2.addNeighbournode("C3", 1); B2.setState(nodeUsed); Node C1("C1"); C1.addNeighbournode("D1", 1); Node C2("C2"); C2.addNeighbournode("D2", 3); C2.addNeighbournode("D3", 6); Node C3("C3"); C3.addNeighbournode("C2", 1); C3.addNeighbournode("D4", 3); Node D1("D1"); D1.addNeighbournode("D2", 9); Node D2("D2"); D2.addNeighbournode("D1", 9); D2.addNeighbournode("C2", 3); Node D3("D3"); D3.addNeighbournode("D4", 2); Node D4("D4"); D4.addNeighbournode("D3", 2); dijkstra planner; planner.calculateRoute(&A1, &D4); planner.printpath(); return 0; }
/* * GenerateBezier : * Use least-squares method to find Bezier control points for region. * */ static BezierCurve GenerateBezier( Point2 *d, /* Array of digitized points */ int first, int last, /* Indices defining region */ double *uPrime, /* Parameter values for region */ Vector2 tHat1, Vector2 tHat2) /* Unit tangents at endpoints */ { int i; // Vector2 A[MAXPOINTS][2]; /* Precomputed rhs for eqn */ int nPts; /* Number of pts in sub-curve */ double C[2][2]; /* Matrix C */ double X[2]; /* Matrix X */ double det_C0_C1, /* Determinants of matrices */ det_C0_X, det_X_C1; double alpha_l, /* Alpha values, left and right */ alpha_r; Vector2 tmp; /* Utility variable */ BezierCurve bezCurve; /* RETURN bezier curve ctl pts */ bezCurve = (Point2 *)malloc(4 * sizeof(Point2)); nPts = last - first + 1; Vector2 (*A)[2]; A = new Vector2[nPts][2]; /* Precomputed rhs for eqn */ /* Compute the A's */ for (i = 0; i < nPts; i++) { Vector2 v1, v2; v1 = tHat1; v2 = tHat2; V2Scale(&v1, B1(uPrime[i])); V2Scale(&v2, B2(uPrime[i])); A[i][0] = v1; A[i][1] = v2; } /* Create the C and X matrices */ C[0][0] = 0.0; C[0][1] = 0.0; C[1][0] = 0.0; C[1][1] = 0.0; X[0] = 0.0; X[1] = 0.0; for (i = 0; i < nPts; i++) { C[0][0] += V2Dot(&A[i][0], &A[i][0]); C[0][1] += V2Dot(&A[i][0], &A[i][1]); /* C[1][0] += V2Dot(&A[i][0], &A[i][1]);*/ C[1][0] = C[0][1]; C[1][1] += V2Dot(&A[i][1], &A[i][1]); tmp = V2SubII(d[first + i], V2AddII( V2ScaleIII(d[first], B0(uPrime[i])), V2AddII( V2ScaleIII(d[first], B1(uPrime[i])), V2AddII( V2ScaleIII(d[last], B2(uPrime[i])), V2ScaleIII(d[last], B3(uPrime[i])))))); X[0] += V2Dot(&A[i][0], &tmp); X[1] += V2Dot(&A[i][1], &tmp); } /* Compute the determinants of C and X */ det_C0_C1 = C[0][0] * C[1][1] - C[1][0] * C[0][1]; det_C0_X = C[0][0] * X[1] - C[0][1] * X[0]; det_X_C1 = X[0] * C[1][1] - X[1] * C[0][1]; /* Finally, derive alpha values */ if (det_C0_C1 == 0.0) { det_C0_C1 = (C[0][0] * C[1][1]) * 10e-12; } alpha_l = det_X_C1 / det_C0_C1; alpha_r = det_C0_X / det_C0_C1; /* If alpha negative, use the Wu/Barsky heuristic (see text) */ if (alpha_l < 0.0 || alpha_r < 0.0) { double dist = V2DistanceBetween2Points(&d[last], &d[first]) / 3.0; bezCurve[0] = d[first]; bezCurve[3] = d[last]; V2Add(&bezCurve[0], V2Scale(&tHat1, dist), &bezCurve[1]); V2Add(&bezCurve[3], V2Scale(&tHat2, dist), &bezCurve[2]); delete[] A; return (bezCurve); } /* First and last control points of the Bezier curve are */ /* positioned exactly at the first and last data points */ /* Control points 1 and 2 are positioned an alpha distance out */ /* on the tangent vectors, left and right, respectively */ bezCurve[0] = d[first]; bezCurve[3] = d[last]; V2Add(&bezCurve[0], V2Scale(&tHat1, alpha_l), &bezCurve[1]); V2Add(&bezCurve[3], V2Scale(&tHat2, alpha_r), &bezCurve[2]); delete[] A; return (bezCurve); }
inline void internal::GemmTNC ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNC assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T,MC,MR> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,STAR,MR> B1_STAR_MR(g); // Start the algorithm Scal( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); A1_STAR_MC.AlignWith( C ); B1_STAR_MR.AlignWith( C ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] B1_STAR_MR = B1; // B1[*,MR] <- B1[MC,MR] // C[MC,MR] += alpha (A1[*,MC])^T B1[*,MR] // = alpha (A1^T)[MC,*] B1[*,MR] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1_STAR_MR, (T)1, C ); //--------------------------------------------------------------------// A1_STAR_MC.FreeAlignments(); B1_STAR_MR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
virtual void exec() { USE(READ, n, tsteps); T DX(static_cast<T>(1.0) / n); T DY(static_cast<T>(1.0) / n); T DT(static_cast<T>(1.0) / tsteps); T B1(static_cast<T>(2.0)); T B2(static_cast<T>(1.0)); T mul1(B1 * DT / (DX * DX)); T mul2(B2 * DT / (DY * DY)); T a(-mul1 / static_cast<T>(2.0)); T b(static_cast<T>(1.0) + mul1); T c(a); T d(-mul2 / static_cast<T>(2.0)); T e(static_cast<T>(1.0) + mul2); T f(d); USE(READWRITE, v, u, p, q); using exec_pol = NestedPolicy<ExecList<omp_parallel_for_exec, simd_exec>, Tile<TileList<tile_fixed<16>, tile_none>>>; for (int t = 0; t < tsteps; ++t) { forall<omp_parallel_for_exec>(1, n - 1, [=](int i) { v->at(0, i) = static_cast<T>(1.0); p->at(i, 0) = static_cast<T>(0.0); q->at(i, 0) = v->at(0, i); v->at(n - 1, i) = static_cast<T>(1.0); }); forallN<exec_pol>( RangeSegment{1, n - 1}, RangeSegment{1, n - 1}, [=](int i, int j) { p->at(i, j) = -c / (a * p->at(i, j - 1) + b); q->at(i, j) = (-d * u->at(j, i - 1) + (1.0 + 2.0 * d) * u->at(j, i) - f * u->at(j, i + 1) - a * q->at(i, j - 1)) / (a * p->at(i, j - 1) + b); }); forallN<exec_pol>( RangeSegment{1, n - 1}, RangeSegment{2, n}, [=](int i, int j_) { int j = n - j_; v->at(j, i) = p->at(i, j) * v->at(j + 1, i) + q->at(i, j); }); forall<omp_parallel_for_exec>(1, n - 1, [=](int i) { u->at(i, 0) = static_cast<T>(1.0); p->at(i, 0) = static_cast<T>(0.0); q->at(i, 0) = u->at(i, 0); u->at(i, n - 1) = static_cast<T>(1.0); }); forallN<exec_pol>( RangeSegment{1, n - 1}, RangeSegment{1, n - 1}, [=](int i, int j) { p->at(i, j) = -f / (d * p->at(i, j - 1) + e); q->at(i, j) = (-a * v->at(i - 1, j) + (static_cast<T>(1.0) + static_cast<T>(2.0) * a) * v->at(i, j) - c * v->at(i + 1, j) - d * q->at(i, j - 1)) / (d * p->at(i, j - 1) + e); }); forallN<exec_pol>( RangeSegment{1, n - 1}, RangeSegment{2, n}, [=](int i, int j_) { int j = n - j_; u->at(i, j) = p->at(i, j) * u->at(i, j + 1) + q->at(i, j); }); } }
inline void SUMMA_NNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_NNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must have the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } }
inline void SUMMA_NNC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_NNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; B1Trans_MR_STAR.TransposeFrom( B1 ); // C[MC,MR] += alpha A1[MC,*] (B1^T[MR,*])^T // = alpha A1[MC,*] B1[*,MR] LocalGemm ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionDown( BT, B0, B1, /**/ /**/ BB, B2 ); } }
/* * GenerateBezier : * Use least-squares method to find Bezier control points for region. * */ static BezierCurve GenerateBezier(Point2 *d, int first, int last, double *uPrime, Vector2 tHat1, Vector2 tHat2) { int i; Vector2 A[MAXPOINTS][2]; /* Precomputed rhs for eqn */ int nPts; /* Number of pts in sub-curve */ double C[2][2]; /* Matrix C */ double X[2]; /* Matrix X */ double det_C0_C1, /* Determinants of matrices */ det_C0_X, det_X_C1; double alpha_l, /* Alpha values, left and right */ alpha_r; Vector2 tmp; /* Utility variable */ BezierCurve bezCurve; /* RETURN bezier curve ctl pts */ bezCurve = (Point2 *)malloc(4 * sizeof(Point2)); nPts = last - first + 1; /* Compute the A's */ for (i = 0; i < nPts; i++) { Vector2 v1, v2; v1 = tHat1; v2 = tHat2; V2Scale(&v1, B1(uPrime[i])); V2Scale(&v2, B2(uPrime[i])); A[i][0] = v1; A[i][1] = v2; } /* Create the C and X matrices */ C[0][0] = 0.0; C[0][1] = 0.0; C[1][0] = 0.0; C[1][1] = 0.0; X[0] = 0.0; X[1] = 0.0; for (i = 0; i < nPts; i++) { C[0][0] += V2Dot(&A[i][0], &A[i][0]); C[0][1] += V2Dot(&A[i][0], &A[i][1]); /* C[1][0] += V2Dot(&A[i][0], &A[i][1]);*/ C[1][0] = C[0][1]; C[1][1] += V2Dot(&A[i][1], &A[i][1]); tmp = V2SubII(d[first + i], V2AddII( V2ScaleIII(d[first], B0(uPrime[i])), V2AddII( V2ScaleIII(d[first], B1(uPrime[i])), V2AddII( V2ScaleIII(d[last], B2(uPrime[i])), V2ScaleIII(d[last], B3(uPrime[i])))))); X[0] += V2Dot(&A[i][0], &tmp); X[1] += V2Dot(&A[i][1], &tmp); } /* Compute the determinants of C and X */ det_C0_C1 = C[0][0] * C[1][1] - C[1][0] * C[0][1]; det_C0_X = C[0][0] * X[1] - C[1][0] * X[0]; det_X_C1 = X[0] * C[1][1] - X[1] * C[0][1]; /* Finally, derive alpha values */ alpha_l = (det_C0_C1 < ZERO_TOLERANCE) ? 0.0 : det_X_C1 / det_C0_C1; alpha_r = (det_C0_C1 < ZERO_TOLERANCE) ? 0.0 : det_C0_X / det_C0_C1; /* If alpha negative, use the Wu/Barsky heuristic (see text) */ /* (if alpha is 0, you get coincident control points that lead to * divide by zero in any subsequent NewtonRaphsonRootFind() call. */ double segLength = V2DistanceBetween2Points(&d[last], &d[first]); double epsilon = 1.0e-6 * segLength; if (alpha_l < epsilon || alpha_r < epsilon) { /* fall back on standard (probably inaccurate) formula, and subdivide further if needed. */ double dist = segLength / 3.0; bezCurve[0] = d[first]; bezCurve[3] = d[last]; V2Add(&bezCurve[0], V2Scale(&tHat1, dist), &bezCurve[1]); V2Add(&bezCurve[3], V2Scale(&tHat2, dist), &bezCurve[2]); return (bezCurve); } /* First and last control points of the Bezier curve are */ /* positioned exactly at the first and last data points */ /* Control points 1 and 2 are positioned an alpha distance out */ /* on the tangent vectors, left and right, respectively */ bezCurve[0] = d[first]; bezCurve[3] = d[last]; V2Add(&bezCurve[0], V2Scale(&tHat1, alpha_l), &bezCurve[1]); V2Add(&bezCurve[3], V2Scale(&tHat2, alpha_r), &bezCurve[2]); return (bezCurve); }
FEdge *ViewEdgeXBuilder::BuildSmoothFEdge(FEdge *feprevious, const OWXFaceLayer& ifl) { WOEdge *woea, *woeb; real ta, tb; SVertex *va, *vb; FEdgeSmooth *fe; // retrieve exact silhouette data WXSmoothEdge *se = ifl.fl->getSmoothEdge(); if (ifl.order) { woea = se->woea(); woeb = se->woeb(); ta = se->ta(); tb = se->tb(); } else { woea = se->woeb(); woeb = se->woea(); ta = se->tb(); tb = se->ta(); } Vec3r normal; // Make the 2 Svertices if (feprevious == 0) { // that means that we don't have any vertex already built for that face Vec3r A1(woea->GetaVertex()->GetVertex()); Vec3r A2(woea->GetbVertex()->GetVertex()); Vec3r A(A1 + ta * (A2 - A1)); va = MakeSVertex(A, false); // Set normal: Vec3r NA1(ifl.fl->getFace()->GetVertexNormal(woea->GetaVertex())); Vec3r NA2(ifl.fl->getFace()->GetVertexNormal(woea->GetbVertex())); Vec3r na((1 - ta) * NA1 + ta * NA2); na.normalize(); va->AddNormal(na); normal = na; // Set CurvatureInfo CurvatureInfo *curvature_info_a = new CurvatureInfo(*(dynamic_cast<WXVertex*>(woea->GetaVertex())->curvatures()), *(dynamic_cast<WXVertex*>(woea->GetbVertex())->curvatures()), ta); va->setCurvatureInfo(curvature_info_a); } else { va = feprevious->vertexB(); } Vec3r B1(woeb->GetaVertex()->GetVertex()); Vec3r B2(woeb->GetbVertex()->GetVertex()); Vec3r B(B1 + tb * (B2 - B1)); if (feprevious && (B - va->point3D()).norm() < 1.0e-6) return feprevious; vb = MakeSVertex(B, false); // Set normal: Vec3r NB1(ifl.fl->getFace()->GetVertexNormal(woeb->GetaVertex())); Vec3r NB2(ifl.fl->getFace()->GetVertexNormal(woeb->GetbVertex())); Vec3r nb((1 - tb) * NB1 + tb * NB2); nb.normalize(); normal += nb; vb->AddNormal(nb); // Set CurvatureInfo CurvatureInfo *curvature_info_b = new CurvatureInfo(*(dynamic_cast<WXVertex*>(woeb->GetaVertex())->curvatures()), *(dynamic_cast<WXVertex*>(woeb->GetbVertex())->curvatures()), tb); vb->setCurvatureInfo(curvature_info_b); // Creates the corresponding feature edge fe = new FEdgeSmooth(va, vb); fe->setNature(ifl.fl->nature()); fe->setId(_currentFId); fe->setFrsMaterialIndex(ifl.fl->getFace()->frs_materialIndex()); fe->setFace(ifl.fl->getFace()); fe->setFaceMark(ifl.fl->getFace()->GetMark()); if (feprevious == 0) normal.normalize(); fe->setNormal(normal); fe->setPreviousEdge(feprevious); if (feprevious) feprevious->setNextEdge(fe); _pCurrentSShape->AddEdge(fe); va->AddFEdge(fe); vb->AddFEdge(fe); ++_currentFId; ifl.fl->userdata = fe; return fe; }
int MatrixTests(const Epetra_BlockMap & Map, const Epetra_LocalMap & LocalMap, int NumVectors, bool verbose) { const Epetra_Comm & Comm = Map.Comm(); int ierr = 0, i; int IndexBase = 0; double *residual = new double[NumVectors]; /* get ID of this processor */ // Test GEMM first. 7 cases: // Num // OPERATIONS case Notes // 1) C(local) = A^X(local) * B^X(local) 4 (X=Trans or Not, No Comm needed) // 2) C(local) = A^T(distr) * B (distr) 1 (2D dot product, replicate C) // 3) C(distr) = A (distr) * B^X(local) 2 (2D vector update, no Comm needed) // ================================================================== // Case 1 through 4 (A, B, C all local) Strided and non-strided cases // ================================================================== // Construct MultiVectors { Epetra_MultiVector A(LocalMap, NumVectors); Epetra_MultiVector B(LocalMap, NumVectors); Epetra_LocalMap Map2d(NumVectors, IndexBase, Comm); Epetra_MultiVector C(Map2d, NumVectors); Epetra_MultiVector C_GEMM(Map2d, NumVectors); double **App, **Bpp, **Cpp; Epetra_MultiVector *Ap, *Bp, *Cp; // For testing non-strided mode, create MultiVectors that are scattered throughout memory App = new double *[NumVectors]; Bpp = new double *[NumVectors]; Cpp = new double *[NumVectors]; for (i=0; i<NumVectors; i++) App[i] = new double[A.MyLength()+i]; for (i=0; i<NumVectors; i++) Bpp[i] = new double[B.MyLength()+i]; for (i=0; i<NumVectors; i++) Cpp[i] = new double[C.MyLength()+i]; Epetra_MultiVector A1(View, LocalMap, App, NumVectors); Epetra_MultiVector B1(View, LocalMap, Bpp, NumVectors); Epetra_MultiVector C1(View, Map2d, Cpp, NumVectors); for (int strided = 0; strided<2; strided++) { // Loop through all trans cases using a variety of values for alpha and beta for (i=0; i<4; i++) { char transa = 'N'; if (i>1) transa = 'T'; char transb = 'N'; if (i%2!=0) transb = 'T'; double alpha = (double) i+1; double beta = (double) (i/2); EPETRA_TEST_ERR(C.Random(),ierr); // Fill C with random numbers int localierr = BuildMatrixTests(C,transa, transb, alpha, A, B, beta, C_GEMM ); if (localierr!=-2) { // -2 means the shapes didn't match and we skip the tests if (strided) { Ap = &A; Bp = &B; Cp = &C; } else { A.ExtractCopy(App); Ap = &A1; B.ExtractCopy(Bpp); Bp = &B1; C.ExtractCopy(Cpp); Cp = &C1; } localierr = Cp->Multiply(transa, transb, alpha, *Ap, *Bp, beta); if (localierr!=-2) { // -2 means the shapes didn't match and we skip the tests ierr += Cp->Update(-1.0, C_GEMM, 1.0); ierr += Cp->Norm2(residual); if (verbose) { cout << "XXXXX Replicated Local MultiVector GEMM tests"; if (strided) cout << " (Strided Multivectors)" << endl; else cout << " (Non-Strided Multivectors)" << endl; cout << " alpha = " << alpha << ", beta = " << beta <<", transa = "<<transa <<", transb = " << transb; } if (BadResidual(verbose,residual, NumVectors)) return(-1); } } } } for (i=0; i<NumVectors; i++) { delete [] App[i]; delete [] Bpp[i]; delete [] Cpp[i]; } delete [] App; delete [] Bpp; delete [] Cpp; } // ==================================== // Case 5 (A, B distributed C local) // ==================================== // Construct MultiVectors { Epetra_MultiVector A(Map, NumVectors); Epetra_MultiVector B(Map, NumVectors); Epetra_LocalMap Map2d(NumVectors, IndexBase, Comm); Epetra_MultiVector C(Map2d, NumVectors); Epetra_MultiVector C_GEMM(Map2d, NumVectors); char transa = 'T'; char transb = 'N'; double alpha = 2.0; double beta = 1.0; EPETRA_TEST_ERR(C.Random(),ierr); // Fill C with random numbers ierr += BuildMatrixTests(C, transa, transb, alpha, A, B, beta, C_GEMM ); int localierr = C.Multiply(transa, transb, alpha, A, B, beta); if (localierr!=-2) { // -2 means the shapes didn't match ierr += C.Update(-1.0, C_GEMM, 1.0); ierr += C.Norm2(residual); if (verbose) { cout << "XXXXX Generalized 2D dot product via GEMM call " << endl; cout << " alpha = " << alpha << ", beta = " << beta <<", transa = "<<transa <<", transb = " << transb; } if (BadResidual(verbose,residual, NumVectors)) return(-1); } } // ==================================== // Case 6-7 (A, C distributed, B local) // ==================================== // Construct MultiVectors { Epetra_MultiVector A(Map, NumVectors); Epetra_LocalMap Map2d(NumVectors, IndexBase, Comm); Epetra_MultiVector B(Map2d, NumVectors); Epetra_MultiVector C(Map, NumVectors); Epetra_MultiVector C_GEMM(Map, NumVectors); for (i=0; i<2; i++) { char transa = 'N'; char transb = 'N'; if (i>0) transb = 'T'; double alpha = 2.0; double beta = 1.1; EPETRA_TEST_ERR(C.Random(),ierr); // Fill C with random numbers ierr += BuildMatrixTests(C,transa, transb, alpha, A, B, beta, C_GEMM ); ierr += C.Multiply(transa, transb, alpha, A, B, beta); ierr += C.Update(-1.0, C_GEMM, 1.0); ierr += C.Norm2(residual); if (verbose) { cout << "XXXXX Generalized 2D vector update via GEMM call " << endl; cout << " alpha = " << alpha << ", beta = " << beta <<", transa = "<<transa <<", transb = " << transb; } if (BadResidual(verbose,residual, NumVectors)) return(-1); } } // ==================================== // LocalMap Tests // ==================================== // Construct MultiVectors { int localLength = 10; double *localMinValue = new double[localLength]; double *localMaxValue = new double[localLength]; double *localNorm1 = new double[localLength]; double *localDot = new double[localLength]; double *localNorm2 = new double[localLength]; double *localMeanValue = new double[localLength]; Epetra_LocalMap MapSmall(localLength, IndexBase, Comm); Epetra_MultiVector A(MapSmall, NumVectors); double doubleLocalLength = (double) localLength; for (int j=0; j< NumVectors; j++) { for (i=0; i< localLength-1; i++) A[j][i] = (double) (i+1); A[j][localLength-1] = (double) (localLength+j); // Only the last value differs across multivectors localMinValue[j] = A[j][0]; // Increasing values localMaxValue[j] = A[j][localLength-1]; localNorm1[j] = (doubleLocalLength-1.0)*(doubleLocalLength)/2.0+A[j][localLength-1]; localDot[j] = (doubleLocalLength-1.0)*(doubleLocalLength)*(2.0*(doubleLocalLength-1.0)+1.0)/6.0+A[j][localLength-1]*A[j][localLength-1]; localNorm2[j] = std::sqrt(localDot[j]); localMeanValue[j] = localNorm1[j]/doubleLocalLength; } ierr += A.MinValue(residual); for (int j=0; j<NumVectors; j++) residual[j] = std::abs(residual[j] - localMinValue[j]); if (verbose) cout << "XXXXX MinValue" << endl; if (BadResidual(verbose,residual, NumVectors)) return(-1); ierr += A.MaxValue(residual); for (int j=0; j<NumVectors; j++) residual[j] = std::abs(residual[j] - localMaxValue[j]); if (verbose) cout << "XXXXX MaxValue" << endl; if (BadResidual(verbose,residual, NumVectors)) return(-1); ierr += A.Norm1(residual); for (int j=0; j<NumVectors; j++) residual[j] = std::abs(residual[j] - localNorm1[j]); if (verbose) cout << "XXXXX Norm1" << endl; if (BadResidual(verbose,residual, NumVectors)) return(-1); ierr += A.Dot(A,residual); for (int j=0; j<NumVectors; j++) residual[j] = std::abs(residual[j] - localDot[j]); if (verbose) cout << "XXXXX Dot" << endl; if (BadResidual(verbose,residual, NumVectors)) return(-1); ierr += A.Norm2(residual); for (int j=0; j<NumVectors; j++) residual[j] = std::abs(residual[j] - localNorm2[j]); if (verbose) cout << "XXXXX Norm2" << endl; if (BadResidual(verbose,residual, NumVectors)) return(-1); ierr += A.MeanValue(residual); for (int j=0; j<NumVectors; j++) residual[j] = std::abs(residual[j] - localMeanValue[j]); if (verbose) cout << "XXXXX MeanValue" << endl; if (BadResidual(verbose,residual, NumVectors)) return(-1); delete [] localMinValue; delete [] localMaxValue; delete [] localNorm1; delete [] localDot; delete [] localNorm2; delete [] localMeanValue; } delete [] residual; return(ierr); }
USING_NAMESPACE_ACADO int main( ){ // Define variables, functions and constants: // ---------------------------------------------------------- DifferentialState dT1; DifferentialState dT2; DifferentialState dT3; DifferentialState dT4; DifferentialState T1; DifferentialState T2; DifferentialState T3; DifferentialState T4; DifferentialState W1; DifferentialState W2; DifferentialState W3; DifferentialState W4; DifferentialState q1; DifferentialState q2; DifferentialState q3; DifferentialState q4; DifferentialState Omega1; DifferentialState Omega2; DifferentialState Omega3; DifferentialState V1; DifferentialState V2; DifferentialState V3; DifferentialState P1; // x DifferentialState P2; // y DifferentialState P3; // z DifferentialState IP1; DifferentialState IP2; DifferentialState IP3; Control U1; Control U2; Control U3; Control U4; DifferentialEquation f1, f2; const double rho = 1.23; const double A = 0.1; const double Cl = 0.25; const double Cd = 0.3*Cl; const double m = 10; const double g = 9.81; const double L = 0.5; const double Jp = 1e-2; const double xi = 1e-2; const double J1 = 0.25; const double J2 = 0.25; const double J3 = 1; const double gain = 1e-4; const double alpha = 0.0; // Define the quadcopter ODE model in fully nonlinear form: // ---------------------------------------------------------- f1 << U1*gain; f1 << U2*gain; f1 << U3*gain; f1 << U4*gain; f1 << dT1; f1 << dT2; f1 << dT3; f1 << dT4; f1 << (T1 - W1*xi)/Jp; f1 << (T2 - W2*xi)/Jp; f1 << (T3 - W3*xi)/Jp; f1 << (T4 - W4*xi)/Jp; f1 << - (Omega1*q2)/2 - (Omega2*q3)/2 - (Omega3*q4)/2 - (alpha*q1*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f1 << (Omega1*q1)/2 - (Omega3*q3)/2 + (Omega2*q4)/2 - (alpha*q2*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f1 << (Omega2*q1)/2 + (Omega3*q2)/2 - (Omega1*q4)/2 - (alpha*q3*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f1 << (Omega3*q1)/2 - (Omega2*q2)/2 + (Omega1*q3)/2 - (alpha*q4*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f1 << (J3*Omega2*Omega3 - J2*Omega2*Omega3 + (A*Cl*L*rho*(W2*W2 - W4*W4))/2)/J1; f1 << -(J3*Omega1*Omega3 - J1*Omega1*Omega3 + (A*Cl*L*rho*(W1*W1 - W3*W3))/2)/J2; f1 << (J2*Omega1*Omega2 - J1*Omega1*Omega2 + (A*Cd*rho*(W1*W1 - W2*W2 + W3*W3 - W4*W4))/2)/J3; f1 << (A*Cl*rho*(2*q1*q3 + 2*q2*q4)*(W1*W1 + W2*W2 + W3*W3 + W4*W4))/(2*m); f1 << -(A*Cl*rho*(2*q1*q2 - 2*q3*q4)*(W1*W1 + W2*W2 + W3*W3 + W4*W4))/(2*m); f1 << (A*Cl*rho*(W1*W1 + W2*W2 + W3*W3 + W4*W4)*(q1*q1 - q2*q2 - q3*q3 + q4*q4))/(2*m) - g; f1 << V1; f1 << V2; f1 << V3; f1 << P1; f1 << P2; f1 << P3; // Define the quadcopter ODE model in 3-stage format: // ---------------------------------------------------------- // LINEAR INPUT SYSTEM (STAGE 1): Matrix M1, A1, B1; M1 = eye(12); A1 = zeros(12,12); B1 = zeros(12,4); A1(4,0) = 1.0; A1(5,1) = 1.0; A1(6,2) = 1.0; A1(7,3) = 1.0; A1(8,4) = 1.0/Jp; A1(8,8) = -xi/Jp; A1(9,5) = 1.0/Jp; A1(9,9) = -xi/Jp; A1(10,6) = 1.0/Jp; A1(10,10) = -xi/Jp; A1(11,7) = 1.0/Jp; A1(11,11) = -xi/Jp; B1(0,0) = gain; B1(1,1) = gain; B1(2,2) = gain; B1(3,3) = gain; // NONLINEAR SYSTEM (STAGE 2): f2 << - (Omega1*q2)/2 - (Omega2*q3)/2 - (Omega3*q4)/2 - (alpha*q1*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f2 << (Omega1*q1)/2 - (Omega3*q3)/2 + (Omega2*q4)/2 - (alpha*q2*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f2 << (Omega2*q1)/2 + (Omega3*q2)/2 - (Omega1*q4)/2 - (alpha*q3*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f2 << (Omega3*q1)/2 - (Omega2*q2)/2 + (Omega1*q3)/2 - (alpha*q4*(q1*q1 + q2*q2 + q3*q3 + q4*q4 - 1))/(q1*q1 + q2*q2 + q3*q3 + q4*q4); f2 << (J3*Omega2*Omega3 - J2*Omega2*Omega3 + (A*Cl*L*rho*(W2*W2 - W4*W4))/2)/J1; f2 << -(J3*Omega1*Omega3 - J1*Omega1*Omega3 + (A*Cl*L*rho*(W1*W1 - W3*W3))/2)/J2; f2 << (J2*Omega1*Omega2 - J1*Omega1*Omega2 + (A*Cd*rho*(W1*W1 - W2*W2 + W3*W3 - W4*W4))/2)/J3; f2 << (A*Cl*rho*(2*q1*q3 + 2*q2*q4)*(W1*W1 + W2*W2 + W3*W3 + W4*W4))/(2*m); f2 << -(A*Cl*rho*(2*q1*q2 - 2*q3*q4)*(W1*W1 + W2*W2 + W3*W3 + W4*W4))/(2*m); f2 << (A*Cl*rho*(W1*W1 + W2*W2 + W3*W3 + W4*W4)*(q1*q1 - q2*q2 - q3*q3 + q4*q4))/(2*m) - g; // LINEAR OUTPUT SYSTEM (STAGE 3): Matrix M3, A3; M3 = eye(6); A3 = zeros(6,6); A3(3,0) = 1.0; A3(4,1) = 1.0; A3(5,2) = 1.0; OutputFcn f3; f3 << V1; f3 << V2; f3 << V3; f3 << 0.0; f3 << 0.0; f3 << 0.0; // ---------------------------------------------------------- // ---------------------------------------------------------- SIMexport sim1( 10, 1.0 ); sim1.setModel( f1 ); sim1.set( INTEGRATOR_TYPE, INT_IRK_GL4 ); sim1.set( NUM_INTEGRATOR_STEPS, 50 ); sim1.setTimingSteps( 10000 ); acadoPrintf( "-----------------------------------------------------------\n Using a QuadCopter ODE model in fully nonlinear form:\n-----------------------------------------------------------\n" ); sim1.exportAndRun( "quadcopter_export", "init_quadcopter.txt", "controls_quadcopter.txt" ); // ---------------------------------------------------------- // ---------------------------------------------------------- SIMexport sim2( 10, 1.0 ); sim2.setLinearInput( M1, A1, B1 ); sim2.setModel( f2 ); sim2.setLinearOutput( M3, A3, f3 ); sim2.set( INTEGRATOR_TYPE, INT_IRK_GL4 ); sim2.set( NUM_INTEGRATOR_STEPS, 50 ); sim2.setTimingSteps( 10000 ); acadoPrintf( "-----------------------------------------------------------\n Using a QuadCopter ODE model in 3-stage format:\n-----------------------------------------------------------\n" ); sim2.exportAndRun( "quadcopter_export", "init_quadcopter.txt", "controls_quadcopter.txt" ); return 0; }
inline void HemmRUA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HemmRUA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T,MR, STAR> B1Adj_MR_STAR(g); DistMatrix<T,VC, STAR> B1Adj_VC_STAR(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MC, STAR> Z1Adj_MC_STAR(g); DistMatrix<T,MR, STAR> Z1Adj_MR_STAR(g); DistMatrix<T,MR, MC > Z1Adj_MR_MC(g); DistMatrix<T> Z1Adj(g); B1Adj_MR_STAR.AlignWith( A ); B1Adj_VC_STAR.AlignWith( A ); B1_STAR_MC.AlignWith( A ); Z1Adj_MC_STAR.AlignWith( A ); Z1Adj_MR_STAR.AlignWith( A ); Matrix<T> Z1Local; Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CT.Height() < C.Height() ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); Z1Adj_MR_MC.AlignWith( C1 ); Zeros( C1.Width(), C1.Height(), Z1Adj_MC_STAR ); Zeros( C1.Width(), C1.Height(), Z1Adj_MR_STAR ); //--------------------------------------------------------------------// B1Adj_MR_STAR.AdjointFrom( B1 ); B1Adj_VC_STAR = B1Adj_MR_STAR; B1_STAR_MC.AdjointFrom( B1Adj_VC_STAR ); LocalSymmetricAccumulateRU ( ADJOINT, alpha, A, B1_STAR_MC, B1Adj_MR_STAR, Z1Adj_MC_STAR, Z1Adj_MR_STAR ); Z1Adj.SumScatterFrom( Z1Adj_MC_STAR ); Z1Adj_MR_MC = Z1Adj; Z1Adj_MR_MC.SumScatterUpdate( T(1), Z1Adj_MR_STAR ); Adjoint( Z1Adj_MR_MC.LockedLocalMatrix(), Z1Local ); Axpy( T(1), Z1Local, C1.LocalMatrix() ); //--------------------------------------------------------------------// Z1Adj_MR_MC.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
void Render::addContainer() { glm::vec3 A1(0.0f, 0.0f, 0.0f); glm::vec3 B1(1.0f, 0.0f, 0.0f); glm::vec3 C1(1.0f, 0.0f, 1.0f); glm::vec3 D1(0.0f, 0.0f, 1.0f); glm::vec3 A2(0.0f, 1.0f, 0.0f); glm::vec3 B2(1.0f, 1.0f, 0.0f); glm::vec3 C2(1.0f, 1.0f, 1.0f); glm::vec3 D2(0.0f, 1.0f, 1.0f); glm::vec3 normal(0.0f, 0.0f, 0.0f); vertices.push_back(A1); normals.push_back(normal); vertices.push_back(B1); normals.push_back(normal); vertices.push_back(A1); normals.push_back(normal); vertices.push_back(D1); normals.push_back(normal); vertices.push_back(B1); normals.push_back(normal); vertices.push_back(C1); normals.push_back(normal); vertices.push_back(D1); normals.push_back(normal); vertices.push_back(C1); normals.push_back(normal); vertices.push_back(A2); normals.push_back(normal); vertices.push_back(B2); normals.push_back(normal); vertices.push_back(A2); normals.push_back(normal); vertices.push_back(D2); normals.push_back(normal); vertices.push_back(B2); normals.push_back(normal); vertices.push_back(C2); normals.push_back(normal); vertices.push_back(D2); normals.push_back(normal); vertices.push_back(C2); normals.push_back(normal); vertices.push_back(A1); normals.push_back(normal); vertices.push_back(A2); normals.push_back(normal); vertices.push_back(B1); normals.push_back(normal); vertices.push_back(B2); normals.push_back(normal); vertices.push_back(C1); normals.push_back(normal); vertices.push_back(C2); normals.push_back(normal); vertices.push_back(D1); normals.push_back(normal); vertices.push_back(D2); normals.push_back(normal); }
double Bezier::yFromT(double t) const { // http://www.lemoda.net/maths/bezier-length/index.html return m_endpoint0.y() * B0(t) + m_cp0.y() * B1(t) + m_cp1.y() * B2(t) + m_endpoint1.y() * B3(t); }
int main() { std::cout << "============== Test 1 ==============" << std::endl << std::endl; DoubleInterval A00(2,3); DoubleInterval A01(0,1); DoubleInterval A10(1,2); DoubleInterval A11(2,3); DoubleInterval B0(0,120); DoubleInterval B1(60,240); DoubleMatrix *A = new DoubleMatrix(2,2); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A: " << std::endl; std::cout << *A << std::endl; DoubleVector *b = new DoubleVector(2, (DoubleInterval)0); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 2 ==============" << std::endl << std::endl; A00.assign(-1,3); A = new DoubleMatrix(2,2); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A: " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 3 ==============" << std::endl << std::endl; A00.assign(2,3); A01.assign(-5,6); A = new DoubleMatrix(2,2); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A: " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(1,3); B1.assign(3,4); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 4 ==============" << std::endl << std::endl; A00.assign(-2,1); A01.assign(1,5); A = new DoubleMatrix(2,2); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A: " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(1,3); B1.assign(3,4); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 5 ==============" << std::endl << std::endl; A00.assign(2,3); A01.assign(3,4); DoubleInterval A02(1,2); DoubleInterval A12(0,1); DoubleInterval A21(6,8); DoubleInterval A22(4,5); B0.assign(0,120); B1.assign(310,440); DoubleInterval B2(50,120); A = new DoubleMatrix(3,3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = A02; (*A)(1,0) = A00; (*A)(1,1) = A00; (*A)(1,2) = A12; (*A)(2,0) = A00; (*A)(2,1) = A21; (*A)(2,2) = A22; std::cout << "A: " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; /*std::cout << "============== Test 6 ==============" << std::endl << std::endl; A00.assign(2,3); A = new DoubleMatrix(1,1); (*A)(0,0) = A00; std::cout << "A: " << std::endl; std::cout << *A << std::endl; B0.assign(4,5); b = new DoubleVector(1, (DoubleInterval)0); (*b)[0] = B0; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b, argc, argv); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 7 ==============" << std::endl << std::endl; A00.assign(1,2); A = new DoubleMatrix(1,1); (*A)(0,0) = A00; std::cout << "A: " << std::endl; std::cout << *A << std::endl; B0.assign(-5,10); b = new DoubleVector(1, (DoubleInterval)0); (*b)[0] = B0; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b, argc, argv); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; }*/ std::cout << "============== Test 15 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); (*A)(0,0) = A10; (*A)(0,1) = A11; (*A)(1,0) = A00; (*A)(1,1) = A01; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); (*b)[0] = B1; (*b)[1] = B0; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 16 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(3,4); A01.assign(1,2); A10.assign(0,1); A11.assign(7,8); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(2,4); B1.assign(-1,1); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 17 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(2,4); A01.assign(8,10); A10.assign(2,4); A11.assign(4,6); (*A)(0,0) = -A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(4,6); B1.assign(8,10); (*b)[0] = -B0; (*b)[1] = -B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 18 ==============" << std::endl << std::endl; A = new DoubleMatrix(4,4); A00.assign(4,6); A01.assign(-6,-4); A10.assign(9,11); A11.assign(-11,-9); DoubleInterval A33(-1,1); (*A)(0,0) = A00; (*A)(0,1) = A33; (*A)(0,2) = A33; (*A)(0,3) = A33; (*A)(1,0) = A33; (*A)(1,1) = A01; (*A)(1,2) = A33; (*A)(1,3) = A33; (*A)(2,0) = A33; (*A)(2,1) = A33; (*A)(2,2) = A10; (*A)(2,3) = A33; (*A)(3,0) = A33; (*A)(3,1) = A33; (*A)(3,2) = A33; (*A)(3,3) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(4, (DoubleInterval)0); B0.assign(-2,4); B1.assign(1,8); B2.assign(-4,10); DoubleInterval B3(2,12); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; (*b)[3] = B3; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 19 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(-14,14); B1.assign(-9,9); B2.assign(-3,3); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 20 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(-14,0); B1.assign(-9,0); B2.assign(-3,0); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 21 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(0,14); B1.assign(0,9); B2.assign(0,3); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 22 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(2,14); B1.assign(-9,-3); B2.assign(-3,1); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 23 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(2,3); A01.assign(4,5); A02.assign(1,2); A10.assign(-6,-5); A11.assign(-3,-2); A12.assign(3,4); DoubleInterval A20(-4,0); A21.assign(-5,-4); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = A02; (*A)(1,0) = A10; (*A)(1,1) = A11; (*A)(1,2) = A12; (*A)(2,0) = A20; (*A)(2,1) = A21; (*A)(2,2) = A00; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(2,14); B1.assign(9,300); B2.assign(3,100); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 24 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(2,3); A01.assign(-1,1); A10.assign(0,5); A11.assign(3,4); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(2,14); B1.assign(3,9); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; std::cout << "============== Test 25 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(1,1000); A01.assign(1,1000); A10.assign(-1000,1); A11.assign(1,1000); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(1,2); B1.assign(3,4); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; try { get_exact_system(*A,*b); } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; return 0; }
int main(int argc, char const *argv[]) { Eigen::setNbThreads(NumCores); #ifdef MKL mkl_set_num_threads(NumCores); #endif INFO("Eigen3 uses " << Eigen::nbThreads() << " threads."); int L; RealType J12ratio; int OBC; int N; RealType Uin, phi; std::vector<RealType> Vin; LoadParameters( "conf.h5", L, J12ratio, OBC, N, Uin, Vin, phi); HDF5IO file("BSSH.h5"); // const int L = 5; // const bool OBC = true; // const RealType J12ratio = 0.010e0; INFO("Build Lattice - "); std::vector<ComplexType> J; if ( OBC ){ J = std::vector<ComplexType>(L - 1, ComplexType(1.0, 0.0)); for (size_t cnt = 0; cnt < L-1; cnt+=2) { J.at(cnt) *= J12ratio; } } else{ J = std::vector<ComplexType>(L, ComplexType(1.0, 0.0)); for (size_t cnt = 0; cnt < L; cnt+=2) { J.at(cnt) *= J12ratio; } if ( std::abs(phi) > 1.0e-10 ){ J.at(L-1) *= exp( ComplexType(0.0e0, 1.0e0) * phi ); // INFO(exp( ComplexType(0.0e0, 1.0e0) * phi )); } } for ( auto &val : J ){ INFO_NONEWLINE(val << " "); } INFO(""); const std::vector< Node<ComplexType>* > lattice = NN_1D_Chain(L, J, OBC); file.saveNumber("1DChain", "L", L); file.saveNumber("1DChain", "U", Uin); file.saveStdVector("1DChain", "J", J); for ( auto < : lattice ){ if ( !(lt->VerifySite()) ) RUNTIME_ERROR("Wrong lattice setup!"); } INFO("DONE!"); INFO("Build Basis - "); // int N1 = (L+1)/2; Basis B1(L, N); B1.Boson(); // std::vector< std::vector<int> > st = B1.getBStates(); // std::vector< RealType > tg = B1.getBTags(); // for (size_t cnt = 0; cnt < tg.size(); cnt++) { // INFO_NONEWLINE( std::setw(3) << cnt << " - "); // for (auto &j : st.at(cnt)){ // INFO_NONEWLINE(j << " "); // } // INFO("- " << tg.at(cnt)); // } file.saveNumber("1DChain", "N", N); // file.saveStdVector("Basis", "States", st); // file.saveStdVector("Basis", "Tags", tg); INFO("DONE!"); INFO_NONEWLINE("Build Hamiltonian - "); std::vector<Basis> Bases; Bases.push_back(B1); Hamiltonian<ComplexType> ham( Bases ); std::vector< std::vector<ComplexType> > Vloc; std::vector<ComplexType> Vtmp;//(L, 1.0); for ( RealType &val : Vin ){ Vtmp.push_back((ComplexType)val); } Vloc.push_back(Vtmp); std::vector< std::vector<ComplexType> > Uloc; // std::vector<ComplexType> Utmp(L, ComplexType(10.0e0, 0.0e0) ); std::vector<ComplexType> Utmp(L, (ComplexType)Uin); Uloc.push_back(Utmp); ham.BuildLocalHamiltonian(Vloc, Uloc, Bases); ham.BuildHoppingHamiltonian(Bases, lattice); ham.BuildTotalHamiltonian(); INFO("DONE!"); INFO_NONEWLINE("Diagonalize Hamiltonian - "); std::vector<RealType> Val; Hamiltonian<ComplexType>::VectorType Vec; ham.eigh(Val, Vec); INFO("GS energy = " << Val.at(0)); file.saveVector("GS", "EVec", Vec); file.saveStdVector("GS", "EVal", Val); INFO("DONE!"); std::vector<ComplexType> Nbi = Ni( Bases, Vec ); for (auto &n : Nbi ){ INFO( n << " " ); } ComplexMatrixType Nij = NiNj( Bases, Vec ); INFO(Nij); INFO(Nij.diagonal()); file.saveStdVector("Obs", "Nb", Nbi); file.saveMatrix("Obs", "Nij", Nij); return 0; }
int convert_Intel_records( FILE *ifp, char *inm, FILE *ofp, char *onm) { char buff[512]; char *p; u8 cksum; int incksum; int c; int rectype; /* record type */ int len; /* data length of current line */ u32 addr; u32 base_address = 0; bool endrecord = FALSE; buffer_rec tb; while ( ! endrecord && (fgets(buff, sizeof(buff), ifp))) { p = &buff[0]; if (p[strlen(p)-1] == '\n') /* get rid of newline */ p[strlen(p)-1] = '\0'; if (p[strlen(p)-1] == '\r') /* get rid of any CR */ p[strlen(p)-1] = '\0'; tb.dl_count = 0; if (*p != ':') badformat(p, inm, BADFMT); p++; if ((len = getbyte(&p)) == -1) /* record len */ badformat(buff, inm, BADLEN); if ((addr = get2bytes(&p)) == -1L) /* record addr */ badformat(buff, inm, BADADDR); rectype = getbyte(&p); cksum = len + B0(addr) + B1(addr) + rectype; switch (rectype) { case 0x00: /* normal data record */ tb.dl_destaddr = base_address + addr; while (len--) { if ((c = getbyte(&p)) == -1) badformat(buff, inm, BADDATA); cksum += c; filesum += c; tb.dl_buf[tb.dl_count++] = c; } break; case 0x01: /* execution start address */ base_address = addr; endrecord = TRUE; break; case 0x02: /* new base */ if ((base_address = get2bytes(&p)) == -1L) badformat(buff, inm, BADBASE); cksum += B0(base_address) + B1(base_address); base_address <<= 4; break; case 0x03: /* seg/off execution start address */ { u32 seg, off; seg = get2bytes(&p); off = get2bytes(&p); if ((seg == -1L) || (off == -1L)) badformat(buff, inm, BADADDR); cksum += B0(seg) + B1(seg) + B0(off) + B1(off); tb.dl_jumpaddr = (seg << 4) + off; break; } default: error(0, "unknown Intel-hex record type: 0x%02x", rectype); badformat(buff, inm, BADTYPE); } /* * Verify checksums are correct in file. */ cksum = (-cksum) & 0xff; if ((incksum = getbyte(&p)) == -1) badformat(buff, inm, BADCSUM); if (((u8) incksum) != cksum) badformat(buff, inm, MISCSUM); if (tb.dl_count) write_record(&tb, ofp); } return 0; }
inline void Her2kLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::Her2kLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Her2kLC:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( LOWER, ADJOINT, TRANSPOSE, ADJOINT, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
int convert_S_records( FILE *ifp, char *inm, FILE *ofp, char *onm) { char buff[512]; char *p; u8 cksum; int incksum; int c; int len; /* data length of current line */ int rectype; /* record type */ u32 addr; bool endrecord = FALSE; buffer_rec tb; while ( ! endrecord && (fgets(buff, sizeof(buff), ifp))) { p = &buff[0]; if (p[strlen(p)-1] == '\n') /* get rid of newline */ p[strlen(p)-1] = '\0'; if (p[strlen(p)-1] == '\r') /* get rid of any CR */ p[strlen(p)-1] = '\0'; tb.dl_count = 0; if (*p != 'S') badformat(p, inm, BADFMT); p++; if ((rectype = getnibble(&p)) == -1) /* record type */ badformat(buff, inm, BADTYPE); if ((len = getbyte(&p)) == -1) /* record len */ badformat(buff, inm, BADLEN); cksum = len; switch (rectype) { case 0x00: /* comment field, ignored */ goto write_it; case 0x01: /* data record, 16 bit addr */ if ((addr = get2bytes(&p)) == -1L) badformat(buff, inm, BADADDR); len -= 3; goto doit; case 0x02: /* ... 24 bit addr */ if ((addr = get3bytes(&p)) == -1L) badformat(buff, inm, BADADDR); len -= 4; goto doit; case 0x03: /* ... 32 bit addr */ if ((addr = get4bytes(&p)) == -1L) badformat(buff, inm, BADADDR); len -= 5; doit: cksum += B0(addr) + B1(addr) + B2(addr) + B3(addr); tb.dl_destaddr = addr; while (len--) { if ((c = getbyte(&p)) == -1) badformat(buff, inm, BADDATA); cksum += c; filesum += c; tb.dl_buf[tb.dl_count++] = c; } break; case 0x07: /* 32 bit end record */ if ((addr = get4bytes(&p)) == -1L) badformat(buff, inm, BADADDR); goto end_rec; case 0x08: /* 24 bit end record */ if ((addr = get3bytes(&p)) == -1L) badformat(buff, inm, BADADDR); goto end_rec; case 0x09: /* 16 bit end record */ if ((addr = get2bytes(&p)) == -1L) badformat(buff, inm, BADADDR); end_rec: cksum += B0(addr) + B1(addr) + B2(addr) + B3(addr); tb.dl_jumpaddr = addr; break; default: error(0, "unknown Motorola-S record type: 0x%02x", rectype); badformat(buff, inm, BADTYPE); break; } /* * Verify checksums are correct in file. */ cksum = (~cksum) & 0xff; if ((incksum = getbyte(&p)) == -1) badformat(buff, inm, BADCSUM); if (((u8) incksum) != cksum) badformat(buff, inm, MISCSUM); write_it: if (tb.dl_count) write_record(&tb, ofp); } return 0; }
inline void internal::HemmLLC ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::HemmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T,MC,MR> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T,MC,MR> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Adj_MR_STAR(g); // Start the algorithm Scal( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); ARowPan.LockedView1x2( A10, A11 ); AColPan.LockedView2x1 ( A11, A21 ); CAbove.View2x1 ( C0, C1 ); CBelow.View2x1 ( C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); B1Adj_MR_STAR.AlignWith( C ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Adj_MR_STAR.AdjointFrom( B1 ); internal::LocalGemm ( NORMAL, ADJOINT, alpha, AColPan_MC_STAR, B1Adj_MR_STAR, (T)1, CBelow ); internal::LocalGemm ( ADJOINT, ADJOINT, alpha, ARowPan_STAR_MC, B1Adj_MR_STAR, (T)1, CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); B1Adj_MR_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
int main(int argc, char *argv[]) { int n = 10; int ierr = 0; double reltol = 1.0e-14; double abstol = 1.0e-14; int MyPID = 0; try { // Initialize MPI #ifdef HAVE_MPI MPI_Init(&argc,&argv); #endif // Create a communicator for Epetra objects #ifdef HAVE_MPI Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif MyPID = Comm.MyPID(); // Create the map Epetra_Map map(n, 0, Comm); bool verbose = false; // Check for verbose output if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; // Seed the random number generator in Teuchos. We create random // bordering matrices and it is possible different processors might generate // different matrices. By setting the seed, this shouldn't happen. Teuchos::ScalarTraits<double>::seedrandom(12345); // Create and initialize the parameter vector LOCA::ParameterVector pVector; pVector.addParameter("Param 1", 1.69); pVector.addParameter("Param 2", -9.7); pVector.addParameter("Param 3", 0.35); pVector.addParameter("Param 4", -0.78); pVector.addParameter("Param 5", 2.53); // Create parameter list Teuchos::RCP<Teuchos::ParameterList> paramList = Teuchos::rcp(new Teuchos::ParameterList); Teuchos::ParameterList& nlParams = paramList->sublist("NOX"); Teuchos::ParameterList& nlPrintParams = nlParams.sublist("Printing"); nlPrintParams.set("MyPID", MyPID); if (verbose) nlPrintParams.set("Output Information", NOX::Utils::Error + NOX::Utils::Details + NOX::Utils::OuterIteration + NOX::Utils::InnerIteration + NOX::Utils::Warning + NOX::Utils::TestDetails + NOX::Utils::StepperIteration + NOX::Utils::StepperDetails); else nlPrintParams.set("Output Information", NOX::Utils::Error); // Create global data object Teuchos::RCP<LOCA::GlobalData> globalData = LOCA::createGlobalData(paramList); Epetra_Vector clone_vec(map); NOX::Epetra::Vector nox_clone_vec(clone_vec); Teuchos::RCP<NOX::Abstract::Vector> x = nox_clone_vec.clone(NOX::ShapeCopy); x->random(); Teuchos::RCP<NOX::Abstract::MultiVector> dx1 = nox_clone_vec.createMultiVector(3); Teuchos::RCP<NOX::Abstract::MultiVector> dx2 = nox_clone_vec.createMultiVector(1); Teuchos::RCP<NOX::Abstract::MultiVector> dx3 = nox_clone_vec.createMultiVector(2); Teuchos::RCP<NOX::Abstract::MultiVector> dx4 = nox_clone_vec.createMultiVector(2); dx1->random(); dx2->random(); dx3->init(0.0); dx4->random(); Teuchos::RCP<NOX::Abstract::MultiVector> dx_all = dx1->clone(NOX::DeepCopy); dx_all->augment(*dx2); dx_all->augment(*dx3); dx_all->augment(*dx4); NOX::Abstract::MultiVector::DenseMatrix dp1(dx1->numVectors(), pVector.length()); NOX::Abstract::MultiVector::DenseMatrix dp2(dx2->numVectors(), pVector.length()); NOX::Abstract::MultiVector::DenseMatrix dp3(dx3->numVectors(), pVector.length()); NOX::Abstract::MultiVector::DenseMatrix dp4(dx4->numVectors(), pVector.length()); dp1.random(); dp2.random(); dp3.random(); dp4.random(); NOX::Abstract::MultiVector::DenseMatrix dp_all(dx_all->numVectors(), pVector.length()); for (int j=0; j<dp_all.numCols(); j++) { for (int i=0; i<dp1.numRows(); i++) dp_all(i,j) = dp1(i,j); for (int i=0; i<dp2.numRows(); i++) dp_all(dp1.numRows()+i,j) = dp2(i,j); for (int i=0; i<dp3.numRows(); i++) dp_all(dp1.numRows()+dp2.numRows()+i,j) = dp3(i,j); for (int i=0; i<dp4.numRows(); i++) dp_all(dp1.numRows()+dp2.numRows()+dp3.numRows()+i,j) = dp4(i,j); } std::vector< Teuchos::RCP<LOCA::MultiContinuation::ConstraintInterface> > constraintObjs(4); Teuchos::RCP<LinearConstraint> linear_constraint; linear_constraint = Teuchos::rcp(new LinearConstraint(dx1->numVectors(), pVector, nox_clone_vec)); linear_constraint->setDgDx(*dx1); linear_constraint->setDgDp(dp1); linear_constraint->setIsZeroDX(false); constraintObjs[0] = linear_constraint; linear_constraint = Teuchos::rcp(new LinearConstraint(dx2->numVectors(), pVector, nox_clone_vec)); linear_constraint->setDgDx(*dx2); linear_constraint->setDgDp(dp2); linear_constraint->setIsZeroDX(false); constraintObjs[1] = linear_constraint; linear_constraint = Teuchos::rcp(new LinearConstraint(dx3->numVectors(), pVector, nox_clone_vec)); linear_constraint->setDgDx(*dx3); linear_constraint->setDgDp(dp3); linear_constraint->setIsZeroDX(true); constraintObjs[2] = linear_constraint; linear_constraint = Teuchos::rcp(new LinearConstraint(dx4->numVectors(), pVector, nox_clone_vec)); linear_constraint->setDgDx(*dx4); linear_constraint->setDgDp(dp4); linear_constraint->setIsZeroDX(false); constraintObjs[3] = linear_constraint; // Check some statistics on the solution NOX::TestCompare testCompare(globalData->locaUtils->out(), *(globalData->locaUtils)); LOCA::MultiContinuation::CompositeConstraint composite(globalData, constraintObjs); composite.setX(*x); LinearConstraint combined(dx_all->numVectors(), pVector, nox_clone_vec); combined.setDgDx(*dx_all); combined.setDgDp(dp_all); combined.setX(*x); // // test computeConstraints() // composite.computeConstraints(); combined.computeConstraints(); int numConstraints = dx_all->numVectors(); const NOX::Abstract::MultiVector::DenseMatrix& g_composite = composite.getConstraints(); const NOX::Abstract::MultiVector::DenseMatrix& g_combined = combined.getConstraints(); ierr += testCompare.testMatrix( g_composite, g_combined, reltol, abstol, "CompositeConstraint::computeConstraints()"); // // test computeDP() // std::vector<int> paramIDs(3); paramIDs[0] = 1; paramIDs[1] = 2; paramIDs[2] = 4; NOX::Abstract::MultiVector::DenseMatrix dgdp_composite( numConstraints, paramIDs.size()+1); NOX::Abstract::MultiVector::DenseMatrix dgdp_combined( numConstraints, paramIDs.size()+1); dgdp_composite.putScalar(0.0); dgdp_combined.putScalar(0.0); composite.computeDP(paramIDs, dgdp_composite, false); combined.computeDP(paramIDs, dgdp_combined, false); ierr += testCompare.testMatrix( dgdp_composite, dgdp_combined, reltol, abstol, "CompositeConstraint::computeDP()"); // // test multiplyDX() // composite.computeDX(); combined.computeDX(); int numMultiply = 5; Teuchos::RCP<NOX::Abstract::MultiVector> A = nox_clone_vec.createMultiVector(numMultiply); A->random(); NOX::Abstract::MultiVector::DenseMatrix composite_multiply(numConstraints, numMultiply); NOX::Abstract::MultiVector::DenseMatrix combined_multiply(numConstraints, numMultiply); composite.multiplyDX(2.65, *A, composite_multiply); combined.multiplyDX(2.65, *A, combined_multiply); ierr += testCompare.testMatrix(composite_multiply, combined_multiply, reltol, abstol, "CompositeConstraint::multiplyDX()"); // // test addDX() (No Trans) // int numAdd = 5; NOX::Abstract::MultiVector::DenseMatrix B1(numConstraints, numAdd); B1.random(); NOX::Abstract::MultiVector::DenseMatrix B2(numAdd, numConstraints); B2.random(); Teuchos::RCP<NOX::Abstract::MultiVector> composite_add1 = nox_clone_vec.createMultiVector(numAdd); composite_add1->random(); Teuchos::RCP<NOX::Abstract::MultiVector> composite_add2 = nox_clone_vec.createMultiVector(numAdd); composite_add2->random(); Teuchos::RCP<NOX::Abstract::MultiVector> combined_add1 = composite_add1->clone(NOX::DeepCopy); Teuchos::RCP<NOX::Abstract::MultiVector> combined_add2 = composite_add2->clone(NOX::DeepCopy); composite.addDX(Teuchos::NO_TRANS, 1.45, B1, 2.78, *composite_add1); combined.addDX(Teuchos::NO_TRANS, 1.45, B1, 2.78, *combined_add1); ierr += testCompare.testMultiVector( *composite_add1, *combined_add1, reltol, abstol, "CompositeConstraint::addDX() (No Trans)"); // // test addDX() (Trans) // composite.addDX(Teuchos::TRANS, 1.45, B2, 2.78, *composite_add2); combined.addDX(Teuchos::TRANS, 1.45, B2, 2.78, *combined_add2); ierr += testCompare.testMultiVector( *composite_add2, *combined_add2, reltol, abstol, "CompositeConstraint::addDX() (Trans)"); LOCA::destroyGlobalData(globalData); } catch (std::exception& e) { std::cout << e.what() << std::endl; ierr = 1; } catch (const char *s) { std::cout << s << std::endl; ierr = 1; } catch (...) { std::cout << "Caught unknown exception!" << std::endl; ierr = 1; } if (MyPID == 0) { if (ierr == 0) std::cout << "All tests passed!" << std::endl; else std::cout << ierr << " test(s) failed!" << std::endl; } #ifdef HAVE_MPI MPI_Finalize() ; #endif return ierr; }
int main() { std::cout << "============== Test 1 ==============" << std::endl << std::endl; DoubleInterval A00(2,3); DoubleInterval A01(0,1); DoubleInterval A10(1,2); DoubleInterval A11(2,3); DoubleInterval B0(0,120); DoubleInterval B1(60,240); DoubleMatrix *A = new DoubleMatrix(2,2); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A: " << std::endl; std::cout << *A << std::endl; DoubleVector *b = new DoubleVector(2, (DoubleInterval)0); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b: " << std::endl; std::cout << *b << std::endl << std::endl; DoubleVector *x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } //Solution should be [[-120,90], [-60,240]]^T delete A; delete b; delete x; /*std::cout << "============== Test 2 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); (*A)(0,0) = 2; (*A)(0,1) = 1; (*A)(0,2) = -1; (*A)(1,0) = -3; (*A)(1,1) = -1; (*A)(1,2) = 2; (*A)(2,0) = -2; (*A)(2,1) = 1; (*A)(2,2) = 2; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 8; (*b)[1] = -11; (*b)[2] = -3; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //Solution should be [2, 3, -1]^T std::cout << "============== Test 3 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); (*A)(0,0) = 1; (*A)(0,1) = -2; (*A)(1,0) = 2; (*A)(1,1) = -1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); (*b)[0] = 3; (*b)[1] = 9; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //Solution should be [5, 1]^T std::cout << "============== Test 4 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); (*A)(0,0) = 2; (*A)(0,1) = 3; (*A)(0,2) = 1; (*A)(1,0) = 1; (*A)(1,1) = 1; (*A)(1,2) = 1; (*A)(2,0) = 3; (*A)(2,1) = 4; (*A)(2,2) = 2; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 1; (*b)[1] = 3; (*b)[2] = 4; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //There shouldn't be an exact solution as it has free variables std::cout << "============== Test 5 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); (*A)(0,0) = 1; (*A)(0,1) = 3; (*A)(0,2) = 1; (*A)(1,0) = 1; (*A)(1,1) = 1; (*A)(1,2) = -1; (*A)(2,0) = 3; (*A)(2,1) = 11; (*A)(2,2) = 5; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 9; (*b)[1] = 1; (*b)[2] = 35; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //There shouldn't be an exact solution as it has free variables std::cout << "============== Test 6 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,2); (*A)(0,0) = 1; (*A)(0,1) = 1; (*A)(1,0) = 2; (*A)(1,1) = 3; (*A)(2,0) = 3; (*A)(2,1) = -2; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 0; (*b)[1] = 0; (*b)[2] = 0; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //There should be an exact solution as even though the system is overdetermined but //as the Rank is 2 and the amount of unknowns is 2 std::cout << "============== Test 7 ==============" << std::endl << std::endl; A = new DoubleMatrix(4,4); (*A)(0,0) = 1; (*A)(0,1) = 1; (*A)(0,2) = 1; (*A)(0,3) = 1; (*A)(1,0) = 2; (*A)(1,1) = 3; (*A)(1,2) = -1; (*A)(1,3) = -1; (*A)(2,0) = 3; (*A)(2,1) = 2; (*A)(2,2) = 1; (*A)(2,3) = 1; (*A)(3,0) = 3; (*A)(3,1) = 6; (*A)(3,2) = -1; (*A)(3,3) = -1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(4, (DoubleInterval)0); (*b)[0] = 0; (*b)[1] = 2; (*b)[2] = 5; (*b)[3] = 4; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(4, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //There should be no solutions as the system is inconsistent std::cout << "============== Test 8 ==============" << std::endl << std::endl; A = new DoubleMatrix(4,3); (*A)(0,0) = -1; (*A)(0,1) = 2; (*A)(0,2) = -1; (*A)(1,0) = -2; (*A)(1,1) = 2; (*A)(1,2) = 1; (*A)(2,0) = 3; (*A)(2,1) = 2; (*A)(2,2) = 2; (*A)(3,0) = -3; (*A)(3,1) = 8; (*A)(3,2) = 5; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(4, (DoubleInterval)0); (*b)[0] = 2; (*b)[1] = 4; (*b)[2] = 5; (*b)[3] = 17; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //There should be an exact solution as even though the system is overdetermined but //as the Rank is 3 and the amount of unknowns is 3 std::cout << "============== Test 9 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,4); (*A)(0,0) = 1; (*A)(0,1) = 3; (*A)(0,2) = 1; (*A)(0,3) = 1; (*A)(1,0) = 2; (*A)(1,1) = -2; (*A)(1,2) = 1; (*A)(1,3) = 2; (*A)(2,0) = 1; (*A)(2,1) = -5; (*A)(2,2) = 0; (*A)(2,3) = 1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 3; (*b)[1] = 8; (*b)[2] = 5; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(4, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //It is an undetermined system, so no exact solution as it has free variables std::cout << "============== Test 10 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,4); (*A)(0,0) = 1; (*A)(0,1) = 0; (*A)(0,2) = 0; (*A)(0,3) = 1; (*A)(1,0) = 0; (*A)(1,1) = 1; (*A)(1,2) = 0; (*A)(1,3) = 1; (*A)(2,0) = 0; (*A)(2,1) = 0; (*A)(2,2) = 1; (*A)(2,3) = 1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 1; (*b)[1] = 1; (*b)[2] = 1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(4, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 11 ==============" << std::endl << std::endl; A = new DoubleMatrix(4,4); (*A)(0,0) = 1; (*A)(0,1) = -1; (*A)(0,2) = 1; (*A)(0,3) = -1; (*A)(1,0) = -1; (*A)(1,1) = 1; (*A)(1,2) = -1; (*A)(1,3) = 1; (*A)(2,0) = 1; (*A)(2,1) = -1; (*A)(2,2) = 1; (*A)(2,3) = -1; (*A)(3,0) = -1; (*A)(3,1) = 1; (*A)(3,2) = -1; (*A)(3,3) = 1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(4, (DoubleInterval)0); (*b)[0] = 0; (*b)[1] = 0; (*b)[2] = 0; (*b)[3] = 0; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(4, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //It is an underdetermined system, so no exact solution as it has free variables //fails here though because no pivoting is done std::cout << "============== Test 12 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); (*A)(0,0) = 1; (*A)(0,1) = 2; (*A)(0,2) = 3; (*A)(1,0) = 4; (*A)(1,1) = 5; (*A)(1,2) = 6; (*A)(2,0) = 7; (*A)(2,1) = 8; (*A)(2,2) = 9; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 0; (*b)[1] = 0; (*b)[2] = 0; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //There shouldn't be an exact solution as it has free variables std::cout << "============== Test 13 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); (*A)(0,0) = 1; (*A)(0,1) = -1; (*A)(0,2) = 2; (*A)(1,0) = 0; (*A)(1,1) = 0; (*A)(1,2) = -1; (*A)(2,0) = 0; (*A)(2,1) = 2; (*A)(2,2) = -1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 8; (*b)[1] = -11; (*b)[2] = -3; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //Should gave the same result as test 14 if pivoting has been implemented correctly std::cout << "============== Test 14 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); (*A)(0,0) = 1; (*A)(0,1) = -1; (*A)(0,2) = 2; (*A)(1,0) = 0; (*A)(1,1) = 2; (*A)(1,2) = -1; (*A)(2,0) = 0; (*A)(2,1) = 0; (*A)(2,2) = -1; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); (*b)[0] = 8; (*b)[1] = -3; (*b)[2] = -11; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; //Same system of equations as test 13*/ std::cout << "============== Test 15 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); (*A)(0,0) = A10; (*A)(0,1) = A11; (*A)(1,0) = A00; (*A)(1,1) = A01; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); (*b)[0] = B1; (*b)[1] = B0; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 16 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(3,4); A01.assign(1,2); A10.assign(0,1); A11.assign(7,8); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(2,4); B1.assign(-1,1); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 17 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(2,4); A01.assign(8,10); A10.assign(2,4); A11.assign(4,6); (*A)(0,0) = -A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(4,6); B1.assign(8,10); (*b)[0] = -B0; (*b)[1] = -B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 18 ==============" << std::endl << std::endl; A = new DoubleMatrix(4,4); A00.assign(4,6); A01.assign(-6,-4); A10.assign(9,11); A11.assign(-11,-9); DoubleInterval A33(-1,1); (*A)(0,0) = A00; (*A)(0,1) = A33; (*A)(0,2) = A33; (*A)(0,3) = A33; (*A)(1,0) = A33; (*A)(1,1) = A01; (*A)(1,2) = A33; (*A)(1,3) = A33; (*A)(2,0) = A33; (*A)(2,1) = A33; (*A)(2,2) = A10; (*A)(2,3) = A33; (*A)(3,0) = A33; (*A)(3,1) = A33; (*A)(3,2) = A33; (*A)(3,3) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(4, (DoubleInterval)0); B0.assign(-2,4); B1.assign(1,8); DoubleInterval B2(-4,10); DoubleInterval B3(2,12); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; (*b)[3] = B3; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(4, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 19 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(-14,14); B1.assign(-9,9); B2.assign(-3,3); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 20 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(-14,0); B1.assign(-9,0); B2.assign(-3,0); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 21 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(0,14); B1.assign(0,9); B2.assign(0,3); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 22 ==============" << std::endl << std::endl; A = new DoubleMatrix(3,3); A00.assign(3.7, 4.3); A01.assign(-1.5, -0.5); A10.assign(3.7, 4.3); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(0,2) = (DoubleInterval)0; (*A)(1,0) = A01; (*A)(1,1) = A10; (*A)(1,2) = A01; (*A)(2,0) = (DoubleInterval)0; (*A)(2,1) = A01; (*A)(2,2) = A10; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(3, (DoubleInterval)0); B0.assign(2,14); B1.assign(-9,-3); B2.assign(-3,1); (*b)[0] = B0; (*b)[1] = B1; (*b)[2] = B2; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(3, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; std::cout << "============== Test 23 ==============" << std::endl << std::endl; A = new DoubleMatrix(2,2); A00.assign(2,3); A01.assign(-1,1); A10.assign(0,5); A11.assign(3,4); (*A)(0,0) = A00; (*A)(0,1) = A01; (*A)(1,0) = A10; (*A)(1,1) = A11; std::cout << "A = " << std::endl; std::cout << *A << std::endl; b = new DoubleVector(2, (DoubleInterval)0); B0.assign(2,14); B1.assign(3,9); (*b)[0] = B0; (*b)[1] = B1; std::cout << "b = " << std::endl; std::cout << *b << std::endl << std::endl; x = new DoubleVector(2, (DoubleInterval)0); try { x = hansen_gaussian_elimination_v1(*A,*b); if(x != NULL) { std::cout << "x = " << std::endl; std::cout << *x << std::endl << std::endl; } } catch(const std::exception& e) { std::cout << e.what() << std::endl << std::endl; } catch(std::string& error) { std::cout << error << std::endl << std::endl; } delete A; delete b; delete x; return 0; }
int main(int argc, char** argv) { if(argc != 4){ std::cout << "Error! Three arguments m, n and p are needed!" << std::endl; return 1; } int m = atoi(argv[1]); int n = atoi(argv[2]); int p = atoi(argv[3]); double elapseTime, elapseTime_single; int myrank, numprocs; MPI_Status status; MPI_Init(&argc, &argv); // 并行开始 //std::cout << "Start to computing..." << std::endl; MPI_Comm_size(MPI_COMM_WORLD, &numprocs); // 获取进程数 MPI_Comm_rank(MPI_COMM_WORLD, &myrank); // 获取本进城ID号 if(numprocs < 2){ std::cout << "Error! There must be more than two processors." << std::endl; return 1; } int bm = m / numprocs; int bn = n / numprocs; SCMatrix A(1), B(1), C(1), C_true(1); SCMatrix bA(bm,p), bB_send(bn,p), bB_recv(bn,p), bC(bm, bn), bC_send(bm, n); if(myrank == 0){ SCMatrix A1(m,p), B1(n,p), C1(m,n), C_true1(m,n); A = A1; B = B1; C = C1; C_true = C_true1; A.init_with_RV(); B.init_with_RV(); } MPI_Barrier(MPI_COMM_WORLD); //A.print(); B.print(); // start to timing clock_t start_time = clock(); //clock_t t_temp_start = clock(); MPI_Scatter(&A, bm * p, MPI_DOUBLE, &bA, bm * p, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatter(&B, bn * p, MPI_DOUBLE, &bB_recv, bn * p, MPI_DOUBLE, 0, MPI_COMM_WORLD); int sendTo = (myrank + 1) % numprocs; int recvFrom = (myrank - 1 + numprocs) % numprocs; int circle = 0; bB_send = bB_recv; do{ bC = bA * bB_recv.transpose(); int blocks_col = (myrank - circle + numprocs) % numprocs; for(int i=0; i<bm; i++){ for(int j=0; j<bn; j++){ bC_send(i, blocks_col*bn + j) = bC(i, j); } } if(myrank % 2 == 0){ bB_send = bB_recv; MPI_Ssend(&bB_send, bn*p, MPI_DOUBLE, sendTo, circle, MPI_COMM_WORLD); MPI_Recv(&bB_recv, bn*p, MPI_DOUBLE, recvFrom, circle, MPI_COMM_WORLD, &status); }else{ MPI_Recv(&bB_recv, bn*p, MPI_DOUBLE, recvFrom, circle, MPI_COMM_WORLD, &status); MPI_Ssend(&bB_send, bn*p, MPI_DOUBLE, sendTo, circle, MPI_COMM_WORLD); bB_send = bB_recv; } circle++; }while(circle < numprocs); MPI_Barrier(MPI_COMM_WORLD); MPI_Gather(&bC_send, bm * n, MPI_DOUBLE, &C, bm * n, MPI_DOUBLE, 0, MPI_COMM_WORLD); if(myrank == 0){ int remainAStartId = bm * numprocs; int remainBStartId = bn * numprocs; for(int i=remainAStartId; i<m; i++){ for(int j=0; j<n; j++){ double temp=0; for(int k=0; k<p; k++){ temp += A(i,k) * B(j,k); } C(i,j) = temp; } } for(int i=0; i<remainAStartId; i++){ for(int j=remainBStartId; j<n; j++){ double temp = 0; for(int k=0; k<p; k++){ temp += A(i,k)*B(j,k); } C(i,j) = temp; } } } // end timing clock_t end_time = clock(); elapseTime = (double)(end_time-start_time) / CLOCKS_PER_SEC; if(myrank == 0){ //std::cout << "[P_0] m = " << m << ", np = " << numprocs << ", time cost: " << elapseTime << std::endl; std::cout << "[P_0] Totally cost " << elapseTime << " seconds in parallel procedure." << std::endl; start_time = clock(); C_true = A * B.transpose(); end_time = clock(); elapseTime_single = double(end_time-start_time) / CLOCKS_PER_SEC; std::cout << "[P_0] Totally cost " << elapseTime_single << " seconds with one processor." << std::endl; bool b = (C==C_true); if(b){ std::cout << "[P_0] Congradulations! The two results are equal." << std::endl; }else{ std::cout << "[P_0] Bad news! The two results do not match." << std::endl; A.print(); B.print(); C.print(); C_true.print(); } } MPI_Finalize(); // 并行结束 return 0; }