inline void TrmmRUNA ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& U, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmRUNA"); if( U.Grid() != X.Grid() ) throw std::logic_error("{U,X} must be distributed over the same grid"); #endif const Grid& g = U.Grid(); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); DistMatrix<T,STAR,VC > X1_STAR_VC(g); DistMatrix<T,STAR,MC > X1_STAR_MC(g); DistMatrix<T,MR, STAR> Z1Trans_MR_STAR(g); DistMatrix<T,MR, MC > Z1Trans_MR_MC(g); X1_STAR_VC.AlignWith( U ); X1_STAR_MC.AlignWith( U ); Z1Trans_MR_STAR.AlignWith( U ); PartitionDown ( X, XT, XB, 0 ); while( XT.Height() < X.Height() ) { RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); Z1Trans_MR_MC.AlignWith( X1 ); //--------------------------------------------------------------------// X1_STAR_VC = X1; X1_STAR_MC = X1_STAR_VC; Zeros( Z1Trans_MR_STAR, X1.Width(), X1.Height() ); LocalTrmmAccumulateRUN ( TRANSPOSE, diag, alpha, U, X1_STAR_MC, Z1Trans_MR_STAR ); Z1Trans_MR_MC.SumScatterFrom( Z1Trans_MR_STAR ); Transpose( Z1Trans_MR_MC.Matrix(), X1.Matrix() ); //--------------------------------------------------------------------// Z1Trans_MR_MC.FreeAlignments(); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } }
inline void TrmmLLTCOld ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLTCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrmmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLTC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1AdjOrTrans_MR_STAR(g); DistMatrix<T,MR, MC > D1AdjOrTrans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionDown ( X, XT, XB, 0 ); while( XB.Height() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); L21_MC_STAR.AlignWith( X2 ); D1AdjOrTrans_MR_STAR.AlignWith( X1 ); D1AdjOrTrans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); Zeros( X1.Width(), X1.Height(), D1AdjOrTrans_MR_STAR ); Zeros( X1.Height(), X1.Width(), D1 ); //--------------------------------------------------------------------// X1_STAR_VR = X1; L11_STAR_STAR = L11; LocalTrmm ( LEFT, LOWER, orientation, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; L21_MC_STAR = L21; LocalGemm ( orientation, NORMAL, T(1), X2, L21_MC_STAR, T(0), D1AdjOrTrans_MR_STAR ); D1AdjOrTrans_MR_MC.SumScatterFrom( D1AdjOrTrans_MR_STAR ); if( orientation == TRANSPOSE ) Transpose( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); else Adjoint( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( T(1), D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1AdjOrTrans_MR_MC.FreeAlignments(); D1AdjOrTrans_MR_STAR.FreeAlignments(); L21_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLLTSmall ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F,STAR,VR>& L, DistMatrix<F,VR,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTSmall"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLLT: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } if( L.RowAlignment() != X.ColAlignment() ) throw std::logic_error("L and X must be aligned"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F,STAR,VR> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F,VR,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; // L11[* ,* ] <- L11[* ,VR] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VR,* ] // X1[* ,* ] := L11^-[T/H][* ,* ] X1[* ,* ] LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VR,* ] -= L10[* ,VR]^(T/H) X1[* ,* ] LocalGemm( orientation, NORMAL, F(-1), L10, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLLTLarge ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLLTLarge"); if( orientation == NORMAL ) throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,MC > L10_STAR_MC(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L10_STAR_MC.AlignWith( X0 ); X1_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := L11^-[T/H][* ,* ] X1[* ,VR] LocalTrsm ( LEFT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] L10_STAR_MC = L10; // L10[* ,MC] <- L10[MC,MR] // X0[MC,MR] -= (L10[* ,MC])^(T/H) X1[* ,MR] // = L10^[T/H][MC,* ] X1[* ,MR] LocalGemm ( orientation, NORMAL, F(-1), L10_STAR_MC, X1_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// L10_STAR_MC.FreeAlignments(); X1_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmLLNC"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,MC, STAR> L21_MC_STAR(g); DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> X1Trans_MR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); L21_MC_STAR.AlignWith( X2 ); X1Trans_MR_STAR.AlignWith( X2 ); X1_STAR_VR.AlignWith( X1 ); //--------------------------------------------------------------------// L21_MC_STAR = L21; X1Trans_MR_STAR.TransposeFrom( X1 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), L21_MC_STAR, X1Trans_MR_STAR, T(1), X2 ); L11_STAR_STAR = L11; X1_STAR_VR.TransposeFrom( X1Trans_MR_STAR ); LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; //--------------------------------------------------------------------// L21_MC_STAR.FreeAlignments(); X1Trans_MR_STAR.FreeAlignments(); X1_STAR_VR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
inline void TrsmLUNSmall ( UnitOrNonUnit diag, F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLUNSmall"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrsmLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str() ); } if( U.ColAlignment() != X.ColAlignment() ) throw std::logic_error("U and X are assumed to be aligned"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F,VC,STAR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F,VC,STAR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,STAR> X1_STAR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ] X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VC,* ] // X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular ); X1 = X1_STAR_STAR; // X0[VC,* ] -= U01[VC,* ] X1[* ,* ] LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 ); //--------------------------------------------------------------------// SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmLUNLarge ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmLUNLarge"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > X1_STAR_MR(g); DistMatrix<F,STAR,VR > X1_STAR_VR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); U01_MC_STAR.AlignWith( X0 ); X1_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1_STAR_VR = X1; // X1[* ,VR] <- X1[MC,MR] // X1[* ,VR] := U11^-1[* ,* ] X1[* ,VR] LocalTrsm ( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_STAR_VR, checkIfSingular ); X1_STAR_MR = X1_STAR_VR; // X1[* ,MR] <- X1[* ,VR] X1 = X1_STAR_MR; // X1[MC,MR] <- X1[* ,MR] U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm( NORMAL, NORMAL, F(-1), U01_MC_STAR, X1_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// U01_MC_STAR.FreeAlignments(); X1_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
float RandomNoise::operator()(SmoothType smooth,int subseed,float xf,float yf,float tf,int loop)const { int x((int)floor(xf)); int y((int)floor(yf)); int t((int)floor(tf)); int t_1, t0, t1, t2; if (loop) { t0 = t % loop; if (t0 < 0 ) t0 += loop; t_1 = t0 - 1; if (t_1 < 0 ) t_1 += loop; t1 = t0 + 1; if (t1 >= loop) t1 -= loop; t2 = t1 + 1; if (t2 >= loop) t2 -= loop; } else { t0 = t; t_1 = t - 1; t1 = t + 1; t2 = t + 2; } // synfig::info("%s:%d tf %.2f loop %d fraction %.2f ( -1,0,1,2 : %2d %2d %2d %2d)", __FILE__, __LINE__, tf, loop, tf-t, t_1, t0, t1, t2); switch(smooth) { case SMOOTH_CUBIC: // cubic { #define f(j,i,k) ((*this)(subseed,i,j,k)) //Using catmull rom interpolation because it doesn't blur at all // ( http://www.gamedev.net/reference/articles/article1497.asp ) //bezier curve with intermediate ctrl pts: 0.5/3(p(i+1) - p(i-1)) and similar float xfa [4], tfa[4]; //precalculate indices (all clamped) and offset const int xa[] = {x-1,x,x+1,x+2}; const int ya[] = {y-1,y,y+1,y+2}; const int ta[] = {t_1,t0,t1,t2}; const float dx(xf-x); const float dy(yf-y); const float dt(tf-t); //figure polynomials for each point const float txf[] = { 0.5f*dx*(dx*(dx*(-1.f) + 2.f) - 1.f), //-t + 2t^2 -t^3 0.5f*(dx*(dx*(3.f*dx - 5.f)) + 2.f), //2 - 5t^2 + 3t^3 0.5f*dx*(dx*(-3.f*dx + 4.f) + 1.f), //t + 4t^2 - 3t^3 0.5f*dx*dx*(dx-1.f) //-t^2 + t^3 }; const float tyf[] = { 0.5f*dy*(dy*(dy*(-1.f) + 2.f) - 1.f), //-t + 2t^2 -t^3 0.5f*(dy*(dy*(3.f*dy - 5.f)) + 2.f), //2 - 5t^2 + 3t^3 0.5f*dy*(dy*(-3.f*dy + 4.f) + 1.f), //t + 4t^2 - 3t^3 0.5f*dy*dy*(dy-1.f) //-t^2 + t^3 }; const float ttf[] = { 0.5f*dt*(dt*(dt*(-1.f) + 2.f) - 1.f), //-t + 2t^2 -t^3 0.5f*(dt*(dt*(3.f*dt - 5.f)) + 2.f), //2 - 5t^2 + 3t^3 0.5f*dt*(dt*(-3.f*dt + 4.f) + 1.f), //t + 4t^2 - 3t^3 0.5f*dt*dt*(dt-1.f) //-t^2 + t^3 }; //evaluate polynomial for each row for(int i = 0; i < 4; ++i) { for(int j = 0; j < 4; ++j) { tfa[j] = f(ya[i],xa[j],ta[0])*ttf[0] + f(ya[i],xa[j],ta[1])*ttf[1] + f(ya[i],xa[j],ta[2])*ttf[2] + f(ya[i],xa[j],ta[3])*ttf[3]; } xfa[i] = tfa[0]*txf[0] + tfa[1]*txf[1] + tfa[2]*txf[2] + tfa[3]*txf[3]; } //return the cumulative column evaluation return xfa[0]*tyf[0] + xfa[1]*tyf[1] + xfa[2]*tyf[2] + xfa[3]*tyf[3]; #undef f } break; case SMOOTH_FAST_SPLINE: // Fast Spline (non-animated) { #define P(x) (((x)>0)?((x)*(x)*(x)):0.0f) #define R(x) ( P(x+2) - 4.0f*P(x+1) + 6.0f*P(x) - 4.0f*P(x-1) )*(1.0f/6.0f) #define F(i,j) ((*this)(subseed,i+x,j+y)*(R((i)-a)*R(b-(j)))) #define FT(i,j,k,l) ((*this)(subseed,i+x,j+y,l)*(R((i)-a)*R(b-(j))*R((k)-c))) #define Z(i,j) ret+=F(i,j) #define ZT(i,j,k,l) ret+=FT(i,j,k,l) #define X(i,j) // placeholder... To make box more symmetric #define XT(i,j,k,l) // placeholder... To make box more symmetric float a(xf-x), b(yf-y); // Interpolate float ret(F(0,0)); Z(-1,-1); Z(-1, 0); Z(-1, 1); Z(-1, 2); Z( 0,-1); X( 0, 0); Z( 0, 1); Z( 0, 2); Z( 1,-1); Z( 1, 0); Z( 1, 1); Z( 1, 2); Z( 2,-1); Z( 2, 0); Z( 2, 1); Z( 2, 2); return ret; } case SMOOTH_SPLINE: // Spline (animated) { float a(xf-x), b(yf-y), c(tf-t); // Interpolate float ret(FT(0,0,0,t0)); ZT(-1,-1,-1,t_1); ZT(-1, 0,-1,t_1); ZT(-1, 1,-1,t_1); ZT(-1, 2,-1,t_1); ZT( 0,-1,-1,t_1); ZT( 0, 0,-1,t_1); ZT( 0, 1,-1,t_1); ZT( 0, 2,-1,t_1); ZT( 1,-1,-1,t_1); ZT( 1, 0,-1,t_1); ZT( 1, 1,-1,t_1); ZT( 1, 2,-1,t_1); ZT( 2,-1,-1,t_1); ZT( 2, 0,-1,t_1); ZT( 2, 1,-1,t_1); ZT( 2, 2,-1,t_1); ZT(-1,-1, 0,t0 ); ZT(-1, 0, 0,t0 ); ZT(-1, 1, 0,t0 ); ZT(-1, 2, 0,t0 ); ZT( 0,-1, 0,t0 ); XT( 0, 0, 0,t0 ); ZT( 0, 1, 0,t0 ); ZT( 0, 2, 0,t0 ); ZT( 1,-1, 0,t0 ); ZT( 1, 0, 0,t0 ); ZT( 1, 1, 0,t0 ); ZT( 1, 2, 0,t0 ); ZT( 2,-1, 0,t0 ); ZT( 2, 0, 0,t0 ); ZT( 2, 1, 0,t0 ); ZT( 2, 2, 0,t0 ); ZT(-1,-1, 1,t1 ); ZT(-1, 0, 1,t1 ); ZT(-1, 1, 1,t1 ); ZT(-1, 2, 1,t1 ); ZT( 0,-1, 1,t1 ); ZT( 0, 0, 1,t1 ); ZT( 0, 1, 1,t1 ); ZT( 0, 2, 1,t1 ); ZT( 1,-1, 1,t1 ); ZT( 1, 0, 1,t1 ); ZT( 1, 1, 1,t1 ); ZT( 1, 2, 1,t1 ); ZT( 2,-1, 1,t1 ); ZT( 2, 0, 1,t1 ); ZT( 2, 1, 1,t1 ); ZT( 2, 2, 1,t1 ); ZT(-1,-1, 2,t2 ); ZT(-1, 0, 2,t2 ); ZT(-1, 1, 2,t2 ); ZT(-1, 2, 2,t2 ); ZT( 0,-1, 2,t2 ); ZT( 0, 0, 2,t2 ); ZT( 0, 1, 2,t2 ); ZT( 0, 2, 2,t2 ); ZT( 1,-1, 2,t2 ); ZT( 1, 0, 2,t2 ); ZT( 1, 1, 2,t2 ); ZT( 1, 2, 2,t2 ); ZT( 2,-1, 2,t2 ); ZT( 2, 0, 2,t2 ); ZT( 2, 1, 2,t2 ); ZT( 2, 2, 2,t2 ); return ret; /* float dx=xf-x; float dy=yf-y; float dt=tf-t; float ret=0; int i,j,h; for(h=-1;h<=2;h++) for(i=-1;i<=2;i++) for(j=-1;j<=2;j++) ret+=(*this)(subseed,i+x,j+y,h+t)*(R(i-dx)*R(j-dy)*R(h-dt)); return ret; */ } break; #undef X #undef Z #undef F #undef P #undef R case SMOOTH_COSINE: if((float)t==tf) { int x((int)floor(xf)); int y((int)floor(yf)); float a=xf-x; float b=yf-y; a=(1.0f-cos(a*PI))*0.5f; b=(1.0f-cos(b*PI))*0.5f; float c=1.0-a; float d=1.0-b; int x2=x+1,y2=y+1; return (*this)(subseed,x,y,t0)*(c*d)+ (*this)(subseed,x2,y,t0)*(a*d)+ (*this)(subseed,x,y2,t0)*(c*b)+ (*this)(subseed,x2,y2,t0)*(a*b); } else { float a=xf-x; float b=yf-y; float c=tf-t; a=(1.0f-cos(a*PI))*0.5f; b=(1.0f-cos(b*PI))*0.5f; // We don't perform this on the time axis, otherwise we won't // get smooth motion //c=(1.0f-cos(c*PI))*0.5f; float d=1.0-a; float e=1.0-b; float f=1.0-c; int x2=x+1,y2=y+1; return (*this)(subseed,x,y,t0)*(d*e*f)+ (*this)(subseed,x2,y,t0)*(a*e*f)+ (*this)(subseed,x,y2,t0)*(d*b*f)+ (*this)(subseed,x2,y2,t0)*(a*b*f)+ (*this)(subseed,x,y,t1)*(d*e*c)+ (*this)(subseed,x2,y,t1)*(a*e*c)+ (*this)(subseed,x,y2,t1)*(d*b*c)+ (*this)(subseed,x2,y2,t1)*(a*b*c); } case SMOOTH_LINEAR: if((float)t==tf) { int x((int)floor(xf)); int y((int)floor(yf)); float a=xf-x; float b=yf-y; float c=1.0-a; float d=1.0-b; int x2=x+1,y2=y+1; return (*this)(subseed,x,y,t0)*(c*d)+ (*this)(subseed,x2,y,t0)*(a*d)+ (*this)(subseed,x,y2,t0)*(c*b)+ (*this)(subseed,x2,y2,t0)*(a*b); } else { float a=xf-x; float b=yf-y; float c=tf-t; float d=1.0-a; float e=1.0-b; float f=1.0-c; int x2=x+1,y2=y+1; return (*this)(subseed,x,y,t0)*(d*e*f)+ (*this)(subseed,x2,y,t0)*(a*e*f)+ (*this)(subseed,x,y2,t0)*(d*b*f)+ (*this)(subseed,x2,y2,t0)*(a*b*f)+ (*this)(subseed,x,y,t1)*(d*e*c)+ (*this)(subseed,x2,y,t1)*(a*e*c)+ (*this)(subseed,x,y2,t1)*(d*b*c)+ (*this)(subseed,x2,y2,t1)*(a*b*c); } default: case SMOOTH_DEFAULT: return (*this)(subseed,x,y,t0); } }
inline void TrsmLUNMedium ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrsmLUNMedium"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MC, STAR> U01_MC_STAR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionUp ( X, XT, XB, 0 ); while( XT.Height() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionUp ( XT, X0, X1, /**/ /**/ XB, X2 ); U01_MC_STAR.AlignWith( X0 ); X1Trans_MR_STAR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR] X1Trans_MR_STAR.TransposeFrom( X1 ); // X1[* ,MR] <- X1[MC,MR] // X1[* ,MR] := U11^-1[* ,* ] X1[* ,MR] // // X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ] LocalTrsm ( RIGHT, UPPER, TRANSPOSE, diag, F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular ); X1.TransposeFrom( X1Trans_MR_STAR ); U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR] // X0[MC,MR] -= U01[MC,* ] X1[* ,MR] LocalGemm ( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 ); //--------------------------------------------------------------------// U01_MC_STAR.FreeAlignments(); X1Trans_MR_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionUp ( XT, X0, /**/ /**/ X1, XB, X2 ); } }
inline void internal::TrmmLUNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T,MC,MR>& U, DistMatrix<T,MC,MR>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLUNC"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || U.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLUN: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T,MC,MR> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T,MC,MR> XT(g), X0(g), XB(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> U11_STAR_STAR(g); DistMatrix<T,STAR,MC > U12_STAR_MC(g); DistMatrix<T,STAR,VR > X1_STAR_VR(g); DistMatrix<T,MR, STAR> D1Trans_MR_STAR(g); DistMatrix<T,MR, MC > D1Trans_MR_MC(g); DistMatrix<T,MC, MR > D1(g); // Start the algorithm Scal( alpha, X ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionDown ( X, XT, XB, 0 ); while( XB.Height() > 0 ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); RepartitionDown ( XT, X0, /**/ /**/ X1, XB, X2 ); U12_STAR_MC.AlignWith( X2 ); D1Trans_MR_STAR.AlignWith( X1 ); D1Trans_MR_MC.AlignWith( X1 ); D1.AlignWith( X1 ); D1Trans_MR_STAR.ResizeTo( X1.Width(), X1.Height() ); D1.ResizeTo( X1.Height(), X1.Width() ); //--------------------------------------------------------------------// X1_STAR_VR = X1; U11_STAR_STAR = U11; internal::LocalTrmm ( LEFT, UPPER, NORMAL, diag, (T)1, U11_STAR_STAR, X1_STAR_VR ); X1 = X1_STAR_VR; U12_STAR_MC = U12; internal::LocalGemm ( TRANSPOSE, TRANSPOSE, (T)1, X2, U12_STAR_MC, (T)0, D1Trans_MR_STAR ); D1Trans_MR_MC.SumScatterFrom( D1Trans_MR_STAR ); Transpose( D1Trans_MR_MC.LocalMatrix(), D1.LocalMatrix() ); Axpy( (T)1, D1, X1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); D1Trans_MR_MC.FreeAlignments(); D1Trans_MR_STAR.FreeAlignments(); U12_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlidePartitionDown ( XT, X0, X1, /**/ /**/ XB, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void HouseholderSolve ( Orientation orientation, DistMatrix<Complex<R> >& A, const DistMatrix<Complex<R> >& B, DistMatrix<Complex<R> >& X ) { #ifndef RELEASE PushCallStack("HouseholderSolve"); if( A.Grid() != B.Grid() || A.Grid() != X.Grid() ) throw std::logic_error("Grids do not match"); if( orientation == TRANSPOSE ) throw std::logic_error("Invalid orientation"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); // TODO: Add scaling const int m = A.Height(); const int n = A.Width(); DistMatrix<C,MD,STAR> t( g ); if( orientation == NORMAL ) { if( m != B.Height() ) throw std::logic_error("A and B do not conform"); if( m >= n ) { // Overwrite A with its packed QR factorization (and store the // corresponding Householder scalars in t) QR( A, t ); // Copy B into X X = B; // Apply Q' to X ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, FORWARD, CONJUGATED, 0, A, t, X ); // Shrink X to its new height X.ResizeTo( n, X.Width() ); // Solve against R (checking for singularities) DistMatrix<C> AT( g ); LockedView( AT, A, 0, 0, n, n ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), AT, X, true ); } else { // Overwrite A with its packed LQ factorization (and store the // corresponding Householder scalars in it) LQ( A, t ); // Copy B into X X.ResizeTo( n, B.Width() ); DistMatrix<C> XT( g ), XB( g ); PartitionDown( X, XT, XB, m ); XT = B; Zero( XB ); // Solve against L (checking for singularities) DistMatrix<C> AL( g ); LockedView( AL, A, 0, 0, m, m ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), AL, XT, true ); // Apply Q' to X ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, CONJUGATED, 0, A, t, X ); } } else // orientation == ADJOINT { if( n != B.Height() ) throw std::logic_error("A and B do not conform"); if( m >= n ) { // Overwrite A with its packed QR factorization (and store the // corresponding Householder scalars in t) QR( A, t ); // Copy B into X X.ResizeTo( m, B.Width() ); DistMatrix<C> XT( g ), XB( g ); PartitionDown( X, XT, XB, n ); XT = B; Zero( XB ); // Solve against R' (checking for singularities) DistMatrix<C> AT( g ); LockedView( AT, A, 0, 0, n, n ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), AT, XT, true ); // Apply Q to X ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, A, t, X ); } else { // Overwrite A with its packed LQ factorization (and store the // corresponding Householder scalars in t) LQ( A, t ); // Copy B into X X = B; // Apply Q to X ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, FORWARD, UNCONJUGATED, 0, A, t, X ); // Shrink X to its new height X.ResizeTo( m, X.Width() ); // Solve against L' (check for singularities) DistMatrix<C> AL( g ); LockedView( AL, A, 0, 0, m, m ); Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), AL, X, true ); } } #ifndef RELEASE PopCallStack(); #endif }