void UUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); F* ABuffer = A.Buffer(); const F* UBuffer = U.LockedBuffer(); vector<F> a12Conj( n ), u12Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal value of U const F upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 / upsilon11 F* a01 = &ABuffer[j*lda]; if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] /= upsilon11; // A02 := A02 - a01 u12 F* A02 = &ABuffer[(j+1)*lda]; const F* u12 = &UBuffer[j+(j+1)*ldu]; blas::Geru( j, a21Height, F(-1), a01, 1, u12, ldu, A02, lda ); // alpha11 := alpha11 / |upsilon11|^2 ABuffer[j+j*lda] /= upsilon11*Conj(upsilon11); const F alpha11 = ABuffer[j+j*lda]; // a12 := a12 / conj(upsilon11) F* a12 = &ABuffer[j+(j+1)*lda]; if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] /= Conj(upsilon11); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; // A22 := A22 - (a12' u12 + u12' a12) F* A22 = &ABuffer[(j+1)+(j+1)*lda]; for( Int k=0; k<a21Height; ++k ) a12Conj[k] = Conj(a12[k*lda]); for( Int k=0; k<a21Height; ++k ) u12Conj[k] = Conj(u12[k*ldu]); blas::Her2 ( 'U', a21Height, F(-1), u12Conj.data(), 1, a12Conj.data(), 1, A22, lda ); // a12 := a12 - (alpha11/2)u12 for( Int k=0; k<a21Height; ++k ) a12[k*lda] -= (alpha11/F(2))*u12[k*ldu]; } }
void LUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& L ) { EL_DEBUG_CSE // Use the Variant 4 algorithm // (which annoyingly requires conjugations for the Her2) const Int n = A.Height(); const Int lda = A.LDim(); const Int ldl = L.LDim(); T* ABuffer = A.Buffer(); const T* LBuffer = L.LockedBuffer(); vector<T> a10Conj( n ), l10Conj( n ); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal values of A and L const T alpha11 = ABuffer[j+j*lda]; const T lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] ); // a10 := a10 + (alpha11/2)l10 T* a10 = &ABuffer[j]; const T* l10 = &LBuffer[j]; for( Int k=0; k<j; ++k ) a10[k*lda] += (alpha11/T(2))*l10[k*ldl]; // A00 := A00 + (a10' l10 + l10' a10) T* A00 = ABuffer; for( Int k=0; k<j; ++k ) a10Conj[k] = Conj(a10[k*lda]); for( Int k=0; k<j; ++k ) l10Conj[k] = Conj(l10[k*ldl]); blas::Her2 ( 'L', j, T(1), a10Conj.data(), 1, l10Conj.data(), 1, A00, lda ); // a10 := a10 + (alpha11/2)l10 for( Int k=0; k<j; ++k ) a10[k*lda] += (alpha11/T(2))*l10[k*ldl]; // a10 := conj(lambda11) a10 if( diag != UNIT ) for( Int k=0; k<j; ++k ) a10[k*lda] *= Conj(lambda11); // alpha11 := alpha11 * |lambda11|^2 ABuffer[j+j*lda] *= Conj(lambda11)*lambda11; // A20 := A20 + a21 l10 T* a21 = &ABuffer[(j+1)+j*lda]; T* A20 = &ABuffer[j+1]; blas::Geru( a21Height, j, T(1), a21, 1, l10, ldl, A20, lda ); // a21 := lambda11 a21 if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a21[k] *= lambda11; } }
inline void ApplyRightReflector( Field& eta0, Field& eta1, const Field* w ) { const Field& tau = w[0]; const Field& nu1 = w[1]; const Field innerProd = Conj(tau)*(eta0+nu1*eta1); eta0 -= innerProd; eta1 -= innerProd*Conj(nu1); }
inline void TwoSidedTrsmLUnb( UnitOrNonUnit diag, Matrix<F>& A, const Matrix<F>& L ) { #ifndef RELEASE PushCallStack("internal::TwoSidedTrsmLUnb"); #endif // Use the Variant 4 algorithm const int n = A.Height(); const int lda = A.LDim(); const int ldl = L.LDim(); F* ABuffer = A.Buffer(); const F* LBuffer = L.LockedBuffer(); for( int j=0; j<n; ++j ) { const int a21Height = n - (j+1); // Extract and store the diagonal value of L const F lambda11 = ( diag==UNIT ? 1 : LBuffer[j+j*ldl] ); // a10 := a10 / lambda11 F* a10 = &ABuffer[j]; if( diag != UNIT ) for( int k=0; k<j; ++k ) a10[k*lda] /= lambda11; // A20 := A20 - l21 a10 F* A20 = &ABuffer[j+1]; const F* l21 = &LBuffer[(j+1)+j*ldl]; blas::Geru( a21Height, j, F(-1), l21, 1, a10, lda, A20, lda ); // alpha11 := alpha11 / |lambda11|^2 ABuffer[j+j*lda] /= lambda11*Conj(lambda11); const F alpha11 = ABuffer[j+j*lda]; // a21 := a21 / conj(lambda11) F* a21 = &ABuffer[(j+1)+j*lda]; if( diag != UNIT ) for( int k=0; k<a21Height; ++k ) a21[k] /= Conj(lambda11); // a21 := a21 - (alpha11/2)l21 for( int k=0; k<a21Height; ++k ) a21[k] -= (alpha11/2)*l21[k]; // A22 := A22 - (l21 a21' + a21 l21') F* A22 = &ABuffer[(j+1)+(j+1)*lda]; blas::Her2( 'L', a21Height, F(-1), l21, 1, a21, 1, A22, lda ); // a21 := a21 - (alpha11/2)l21 for( int k=0; k<a21Height; ++k ) a21[k] -= (alpha11/2)*l21[k]; } #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrmmLUnblocked( Orientation orientation, Matrix<T>& L ) { #ifndef RELEASE PushCallStack("internal::TrtrmmLUnblocked"); if( L.Height() != L.Width() ) throw std::logic_error("L must be square"); if( orientation == NORMAL ) throw std::logic_error("Trtrmm requires (conjugate-)transpose"); #endif const int n = L.Height(); T* LBuffer = L.Buffer(); const int ldim = L.LDim(); for( int j=0; j<n; ++j ) { T* RESTRICT l10 = &LBuffer[j]; if( orientation == ADJOINT ) { // L00 := L00 + l10^H l10 for( int k=0; k<j; ++k ) { const T gamma = l10[k*ldim]; T* RESTRICT L00Col = &LBuffer[k*ldim]; for( int i=k; i<j; ++i ) L00Col[i] += Conj(l10[i*ldim])*gamma; } } else { // L00 := L00 + l10^T l10 for( int k=0; k<j; ++k ) { const T gamma = l10[k*ldim]; T* RESTRICT L00Col = &LBuffer[k*ldim]; for( int i=k; i<j; ++i ) L00Col[i] += l10[i*ldim]*gamma; } } // l10 := l10 lambda11 const T lambda11 = LBuffer[j+j*ldim]; for( int k=0; k<j; ++k ) l10[k*ldim] *= lambda11; // lambda11 := lambda11^2 or |lambda11|^2 if( orientation == ADJOINT ) LBuffer[j+j*ldim] = lambda11*Conj(lambda11); else LBuffer[j+j*ldim] = lambda11*lambda11; } #ifndef RELEASE PopCallStack(); #endif }
inline void TrtrmmUUnblocked( Orientation orientation, Matrix<T>& U ) { #ifndef RELEASE PushCallStack("internal::TrtrmmUUnblocked"); if( U.Height() != U.Width() ) throw std::logic_error("U must be square"); if( orientation == NORMAL ) throw std::logic_error("Trtrmm requires (conjugate-)transpose"); #endif const int n = U.Height(); T* UBuffer = U.Buffer(); const int ldim = U.LDim(); for( int j=0; j<n; ++j ) { T* RESTRICT u01 = &UBuffer[j*ldim]; if( orientation == ADJOINT ) { // U00 := U00 + u01 u01^H for( int k=0; k<j; ++k ) { const T gamma = Conj(u01[k]); T* RESTRICT U00Col = &UBuffer[k*ldim]; for( int i=0; i<=k; ++i ) U00Col[i] += u01[i]*gamma; } } else { // U00 := U00 + u01 u01^T for( int k=0; k<j; ++k ) { const T gamma = u01[k]; T* RESTRICT U00Col = &UBuffer[k*ldim]; for( int i=0; i<=k; ++i ) U00Col[i] += u01[i]*gamma; } } // u01 := u01 upsilon11 const T upsilon11 = UBuffer[j+j*ldim]; for( int k=0; k<j; ++k ) u01[k] *= upsilon11; // upsilon11 := upsilon11^2 or |upsilon11|^2 if( orientation == ADJOINT ) UBuffer[j+j*ldim] = upsilon11*Conj(upsilon11); else UBuffer[j+j*ldim] = upsilon11*upsilon11; } #ifndef RELEASE PopCallStack(); #endif }
void UUnb( UnitOrNonUnit diag, Matrix<T>& A, const Matrix<T>& U ) { EL_DEBUG_CSE // Use the Variant 4 algorithm const Int n = A.Height(); const Int lda = A.LDim(); const Int ldu = U.LDim(); T* ABuffer = A.Buffer(); const T* UBuffer = U.LockedBuffer(); for( Int j=0; j<n; ++j ) { const Int a21Height = n - (j+1); // Extract and store the diagonal values of A and U const T alpha11 = ABuffer[j+j*lda]; const T upsilon11 = ( diag==UNIT ? 1 : UBuffer[j+j*ldu] ); // a01 := a01 + (alpha11/2)u01 T* a01 = &ABuffer[j*lda]; const T* u01 = &UBuffer[j*ldu]; for( Int k=0; k<j; ++k ) a01[k] += (alpha11/T(2))*u01[k]; // A00 := A00 + (u01 a01' + a01 u01') T* A00 = ABuffer; blas::Her2( 'U', j, T(1), u01, 1, a01, 1, A00, lda ); // a01 := a01 + (alpha11/2)u01 for( Int k=0; k<j; ++k ) a01[k] += (alpha11/T(2))*u01[k]; // a01 := conj(upsilon11) a01 if( diag != UNIT ) for( Int k=0; k<j; ++k ) a01[k] *= Conj(upsilon11); // A02 := A02 + u01 a12 T* a12 = &ABuffer[j+(j+1)*lda]; T* A02 = &ABuffer[(j+1)*lda]; blas::Geru( j, a21Height, T(1), u01, 1, a12, lda, A02, lda ); // alpha11 := alpha11 * |upsilon11|^2 ABuffer[j+j*lda] *= Conj(upsilon11)*upsilon11; // a12 := upsilon11 a12 if( diag != UNIT ) for( Int k=0; k<a21Height; ++k ) a12[k*lda] *= upsilon11; } }
inline void SolveAfterCholesky ( UpperOrLower uplo, Orientation orientation, const DistMatrix<F>& A, DistMatrix<F>& B ) { #ifndef RELEASE PushCallStack("SolveAfterLU"); if( A.Grid() != B.Grid() ) throw std::logic_error("{A,B} must be distributed over the same grid"); if( A.Height() != A.Width() ) throw std::logic_error("A must be square"); if( A.Height() != B.Height() ) throw std::logic_error("A and B must be the same height"); #endif if( B.Width() == 1 ) { if( uplo == LOWER ) { if( orientation == TRANSPOSE ) Conj( B ); Trsv( LOWER, NORMAL, NON_UNIT, A, B ); Trsv( LOWER, ADJOINT, NON_UNIT, A, B ); if( orientation == TRANSPOSE ) Conj( B ); } else { if( orientation == TRANSPOSE ) Conj( B ); Trsv( UPPER, ADJOINT, NON_UNIT, A, B ); Trsv( UPPER, NORMAL, NON_UNIT, A, B ); if( orientation == TRANSPOSE ) Conj( B ); } } else { if( uplo == LOWER ) { if( orientation == TRANSPOSE ) Conj( B ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, F(1), A, B ); Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), A, B ); if( orientation == TRANSPOSE ) Conj( B ); } else { if( orientation == TRANSPOSE ) Conj( B ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A, B ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, F(1), A, B ); if( orientation == TRANSPOSE ) Conj( B ); } } #ifndef RELEASE PopCallStack(); #endif }
void Rot ( BlasInt n, F* x, BlasInt incx, F* y, BlasInt incy, const Base<F>& c, const F& s ) { // NOTE: Temporaries are avoided since constructing a BigInt/BigFloat // involves a memory allocation F gamma, delta; for( BlasInt i=0; i<n; ++i ) { //gamma = c*x[i*incx] + s*y[i*incy]; gamma = c; gamma *= x[i*incx]; delta = s; delta *= y[i*incy]; gamma += delta; //y[i*incy] = -Conj(s)*x[i*incx] + c*y[i*incy]; y[i*incy] *= c; Conj( s, delta ); delta *= x[i*incx]; y[i*incy] -= delta; x[i*incx] = gamma; } }
void LTSolve ( IntType m, // L is m-by-m, where m >= 0 */ F* X, // size m. right-hand-side on input, soln. on output const IntType* Lp, // input of size m+1 const IntType* Li, // input of size lnz=Lp[m] const F* Lx, // input of size lnz=Lp[m] bool conjugate ) { if( conjugate ) { for (IntType i = m-1; i >= 0; i--) { IntType p2 = Lp[i+1] ; for (IntType p = Lp[i]; p < p2; p++) X[i] -= Conj(Lx[p]) * X[Li[p]]; } } else { for (IntType i = m-1; i >= 0; i--) { IntType p2 = Lp[i+1] ; for (IntType p = Lp[i]; p < p2; p++) X[i] -= Lx[p] * X[Li[p]]; } } }
inline void CholeskyUVar3Unb( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::CholeskyUVar3Unb"); if( A.Height() != A.Width() ) throw std::logic_error ("Can only compute Cholesky factor of square matrices"); #endif typedef typename Base<F>::type R; const int n = A.Height(); const int lda = A.LDim(); F* ABuffer = A.Buffer(); for( int j=0; j<n; ++j ) { R alpha = RealPart(ABuffer[j+j*lda]); if( alpha <= R(0) ) throw std::logic_error("A was not numerically HPD"); alpha = Sqrt( alpha ); ABuffer[j+j*lda] = alpha; for( int k=j+1; k<n; ++k ) ABuffer[j+k*lda] /= alpha; for( int k=j+1; k<n; ++k ) for( int i=j+1; i<=k; ++i ) ABuffer[i+k*lda] -= Conj(ABuffer[j+i*lda])*ABuffer[j+k*lda]; } #ifndef RELEASE PopCallStack(); #endif }
void TransposeAxpy ( S alphaS, const Matrix<T>& X, Matrix<T>& Y, bool conjugate ) { DEBUG_CSE const T alpha = T(alphaS); const Int mX = X.Height(); const Int nX = X.Width(); const Int nY = Y.Width(); const Int ldX = X.LDim(); const Int ldY = Y.LDim(); const T* XBuf = X.LockedBuffer(); T* YBuf = Y.Buffer(); // If X and Y are vectors, we can allow one to be a column and the other // to be a row. Otherwise we force X and Y to be the same dimension. if( mX == 1 || nX == 1 ) { const Int lengthX = ( nX==1 ? mX : nX ); const Int incX = ( nX==1 ? 1 : ldX ); const Int incY = ( nY==1 ? 1 : ldY ); DEBUG_ONLY( const Int mY = Y.Height(); const Int lengthY = ( nY==1 ? mY : nY ); if( lengthX != lengthY ) LogicError("Nonconformal TransposeAxpy"); ) if( conjugate ) for( Int j=0; j<lengthX; ++j ) YBuf[j*incY] += alpha*Conj(XBuf[j*incX]); else blas::Axpy( lengthX, alpha, XBuf, incX, YBuf, incY ); }
inline T Dotc( int n, const T* x, int incx, const T* y, int incy ) { T alpha = 0; for( int i=0; i<n; ++i ) alpha += Conj(x[i*incx])*y[i*incy]; return alpha; }
inline void MakeKahan( F phi, Matrix<F>& A ) { #ifndef RELEASE PushCallStack("MakeKahan"); #endif typedef typename Base<F>::type R; const int m = A.Height(); const int n = A.Width(); if( m != n ) throw std::logic_error("Cannot make a non-square matrix Kahan"); if( Abs(phi) >= R(1) ) throw std::logic_error("Phi must be in (0,1)"); const F zeta = Sqrt(1-phi*Conj(phi)); MakeZeros( A ); for( int i=0; i<n; ++i ) { const F zetaPow = Pow( zeta, R(i) ); A.Set( i, i, zetaPow ); for( int j=1; j<n; ++j ) A.Set( i, j, -phi*zetaPow ); } #ifndef RELEASE PopCallStack(); #endif }
// incrementalZeroTest sets each res[i], for i=0..n-1, to // a ciphertext in which each slot is 0 or 1 according // to whether or not bits 0..i of corresponding slot in ctxt // is zero (1 if not zero, 0 if zero). // It is assumed that res and each res[i] is already initialized // by the caller. // Complexity: O(d + n log d) smart automorphisms // O(n d) void incrementalZeroTest(Ctxt* res[], const EncryptedArray& ea, const Ctxt& ctxt, long n) { FHE_TIMER_START; long nslots = ea.size(); long d = ea.getDegree(); // compute linearized polynomial coefficients vector< vector<ZZX> > Coeff; Coeff.resize(n); for (long i = 0; i < n; i++) { // coeffients for mask on bits 0..i // L[j] = X^j for j = 0..i, L[j] = 0 for j = i+1..d-1 vector<ZZX> L; L.resize(d); for (long j = 0; j <= i; j++) SetCoeff(L[j], j); vector<ZZX> C; ea.buildLinPolyCoeffs(C, L); Coeff[i].resize(d); for (long j = 0; j < d; j++) { // Coeff[i][j] = to the encoding that has C[j] in all slots // FIXME: maybe encrtpted array should have this functionality // built in vector<ZZX> T; T.resize(nslots); for (long s = 0; s < nslots; s++) T[s] = C[j]; ea.encode(Coeff[i][j], T); } } vector<Ctxt> Conj(d, ctxt); // initialize Cong[j] to ctxt^{2^j} for (long j = 0; j < d; j++) { Conj[j].smartAutomorph(1L << j); } for (long i = 0; i < n; i++) { res[i]->clear(); for (long j = 0; j < d; j++) { Ctxt tmp = Conj[j]; tmp.multByConstant(Coeff[i][j]); *res[i] += tmp; } // *res[i] now has 0..i in each slot // next, we raise to the power 2^d-1 fastPower(*res[i], d); } FHE_TIMER_STOP; }
void UUnb( Matrix<F>& A, Matrix<F>& householderScalars ) { DEBUG_CSE const Int n = A.Height(); const Int householderScalarsHeight = Max(n-1,0); householderScalars.Resize( householderScalarsHeight, 1 ); // Temporary products Matrix<F> x1, x12Adj; for( Int k=0; k<n-1; ++k ) { const Range<Int> ind1( k, k+1 ), ind2( k+1, n ); auto a21 = A( ind2, ind1 ); auto A22 = A( ind2, ind2 ); auto A2 = A( IR(0,n), ind2 ); auto alpha21T = A( IR(k+1,k+2), ind1 ); auto a21B = A( IR(k+2,n), ind1 ); // Find tau and v such that // / I - tau | 1 | | 1, v^H | \ | alpha21T | = | beta | // \ | v | / | a21B | | 0 | const F tau = LeftReflector( alpha21T, a21B ); householderScalars(k) = tau; // Temporarily set a21 := | 1 | // | v | const F beta = alpha21T(0); alpha21T(0) = F(1); // A2 := A2 Hous(a21,tau)^H // = A2 (I - conj(tau) a21 a21^H) // = A2 - conj(tau) (A2 a21) a21^H // ----------------------------------- // x1 := A2 a21 Zeros( x1, n, 1 ); Gemv( NORMAL, F(1), A2, a21, F(0), x1 ); // A2 := A2 - conj(tau) x1 a21^H Ger( -Conj(tau), x1, a21, A2 ); // A22 := Hous(a21,tau) A22 // = (I - tau a21 a21^H) A22 // = A22 - tau a21 (A22^H a21)^H // ---------------------------------- // x12^H := (a21^H A22)^H = A22^H a21 Zeros( x12Adj, A22.Width(), 1 ); Gemv( ADJOINT, F(1), A22, a21, F(0), x12Adj ); // A22 := A22 - tau a21 x12 Ger( -tau, a21, x12Adj, A22 ); // Put beta back alpha21T(0) = beta; } }
inline void DiagonalScale ( LeftOrRight side, Orientation orientation, const Matrix<T>& d, Matrix<T>& X ) { #ifndef RELEASE PushCallStack("DiagonalScale"); #endif const int m = X.Height(); const int n = X.Width(); const int ldim = X.LDim(); if( side == LEFT ) { for( int i=0; i<m; ++i ) { const T delta = d.Get(i,0); T* XBuffer = X.Buffer(i,0); if( orientation == ADJOINT ) for( int j=0; j<n; ++j ) XBuffer[j*ldim] *= Conj(delta); else for( int j=0; j<n; ++j ) XBuffer[j*ldim] *= delta; } } else { for( int j=0; j<n; ++j ) { const T delta = d.Get(j,0); T* XBuffer = X.Buffer(0,j); if( orientation == ADJOINT ) for( int i=0; i<m; ++i ) XBuffer[i] *= Conj(delta); else for( int i=0; i<m; ++i ) XBuffer[i] *= delta; } } #ifndef RELEASE PopCallStack(); #endif }
void KMS( AbstractBlockDistMatrix<T>& K, Int n, T rho ) { DEBUG_ONLY(CallStackEntry cse("KMS")) K.Resize( n, n ); auto kmsFill = [=]( Int i, Int j ) -> T { if( i < j ) { return Pow(rho,T(j-i)); } else { return Conj(Pow(rho,T(i-j))); } }; IndexDependentFill( K, function<T(Int,Int)>(kmsFill) ); }
inline void MakeNormalUniformSpectrum ( Matrix<Complex<R> >& A, Complex<R> center, R radius ) { #ifndef RELEASE PushCallStack("MakeNormalUniformSpectrum"); #endif typedef Complex<R> C; if( A.Height() != A.Width() ) throw std::logic_error("Cannot make a non-square matrix normal"); // Sample the diagonal matrix D from the ball B_radius(center) // and then rotate it with a random Householder similarity transformation: // // (I-2uu^H) D (I-2uu^H)^H = D - 2(u (conj(D) u)^H + (D u) u^H) + // (4 u^H D u) u u^H // // Form d and D const int n = A.Height(); std::vector<C> d( n ); for( int j=0; j<n; ++j ) d[j] = center + radius*SampleUnitBall<C>(); Diagonal( d, A ); // Form u Matrix<C> u( n, 1 ); MakeUniform( u ); const R origNorm = Nrm2( u ); Scale( 1/origNorm, u ); // Form v := D u Matrix<C> v( n, 1 ); for( int i=0; i<n; ++i ) v.Set( i, 0, d[i]*u.Get(i,0) ); // Form w := conj(D) u Matrix<C> w( n, 1 ); for( int i=0; i<n; ++i ) w.Set( i, 0, Conj(d[i])*u.Get(i,0) ); // Update A := A - 2(u w^H + v u^H) Ger( C(-2), u, w, A ); Ger( C(-2), v, u, A ); // Form \gamma := 4 u^H (D u) = 4 (u,Du) const C gamma = 4*Dot(u,v); // Update A := A + gamma u u^H Ger( gamma, u, u, A ); #ifndef RELEASE PopCallStack(); #endif }
inline void ApplyLeftReflector ( Field& eta0, Field& eta1, Field& eta2, const Field* w ) { // Update // // | eta0 | -= tau | 1 | | 1, conj(nu1), conj(nu2) | | eta0 | // | eta1 | | nu1 | | eta1 | // | eta2 | | nu2 | | eta2 | // // where tau is stored in w[0], nu1 in w[1], and nu2 in w[2]. // const Field& tau = w[0]; const Field& nu1 = w[1]; const Field& nu2 = w[2]; const Field innerProd = tau*(eta0+Conj(nu1)*eta1+Conj(nu2)*eta2); eta0 -= innerProd; eta1 -= innerProd*nu1; eta2 -= innerProd*nu2; }
inline void Conjugate( Matrix<T>& A ) { #ifndef RELEASE CallStackEntry entry("Conjugate (in-place)"); #endif const Int m = A.Height(); const Int n = A.Width(); for( Int j=0; j<n; ++j ) for( Int i=0; i<m; ++i ) A.Set(i,j,Conj(A.Get(i,j))); }
void KMS( AbstractDistMatrix<T>& K, Int n, T rho ) { EL_DEBUG_CSE K.Resize( n, n ); auto kmsFill = [=]( Int i, Int j ) -> T { if( i < j ) { return Pow(rho,T(j-i)); } else { return Conj(Pow(rho,T(i-j))); } }; IndexDependentFill( K, function<T(Int,Int)>(kmsFill) ); }
void GradGrad<D>::T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); const ScalarFiniteElement<D> & fel_u = // u space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd1()]); const ScalarFiniteElement<D> & fel_e = // e space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd2()]); elmat = SCAL(0.0); // u dofs [ru.First() : ru.Next()-1], e dofs [re.First() : re.Next()-1] IntRange ru = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofe = re.Size(); int ndofu = ru.Size(); FlatMatrixFixWidth<D> dum(ndofu,lh); // to store grad(u-basis) FlatMatrixFixWidth<D> dem(ndofe,lh); // to store grad(e-basis) ELEMENT_TYPE eltype // get the type of element: = fel_u.ElementType(); // ET_TRIG in 2d, ET_TET in 3d. const IntegrationRule & // Note: p = fel_u.Order()-1 ir = SelectIntegrationRule(eltype, fel_u.Order()+fel_e.Order()-2); FlatMatrix<SCAL> submat(ndofe,ndofu,lh); submat = 0.0; for(int k=0; k<ir.GetNIP(); k++) { MappedIntegrationPoint<D,D> mip (ir[k],eltrans); // set grad(u-basis) and grad(e-basis) at mapped pts in dum and dem. fel_u.CalcMappedDShape( mip, dum ); fel_e.CalcMappedDShape( mip, dem ); // evaluate coefficient SCAL fac = coeff_a -> T_Evaluate<SCAL>(mip); fac *= mip.GetWeight() ; // [ndofe x D] * [D x ndofu] submat += fac * dem * Trans(dum) ; } elmat.Rows(re).Cols(ru) += submat; if (GetInd1() != GetInd2()) elmat.Rows(ru).Cols(re) += Conj(Trans(submat)); }
/** * Calculates the two-loop beta function of MuPhi. * * @return two-loop beta function */ double CNE6SSMSusy_susy_parameters::calc_beta_MuPhi_two_loop(const Susy_traces& susy_traces) const { const double tracegDAdjgD = TRACE_STRUCT.tracegDAdjgD; const double tracehEAdjhE = TRACE_STRUCT.tracehEAdjhE; const double traceKappaAdjKappa = TRACE_STRUCT.traceKappaAdjKappa; const double traceLambda12AdjLambda12 = TRACE_STRUCT.traceLambda12AdjLambda12; double beta_MuPhi; beta_MuPhi = -0.2*MuPhi*twoLoop*(40*AbsSqr(KappaPr)*(AbsSqr(Sigmax) + 2*AbsSqr(SigmaL)) + 4*AbsSqr(SigmaL)*(15*tracegDAdjgD + 5*tracehEAdjhE + 10*AbsSqr(SigmaL) - 3*Sqr(g1) - 2*Sqr(g1p) - 15*Sqr(g2)) + AbsSqr(Sigmax) *(30*traceKappaAdjKappa + 20*traceLambda12AdjLambda12 + 20*AbsSqr(Lambdax ) - Sqr(g1p)*Sqr(QS)) + 80*Sqr(Conj(KappaPr))*Sqr(KappaPr) + 20*Sqr(Conj( Sigmax))*Sqr(Sigmax)); return beta_MuPhi; }
inline void TrdtrmmUUnblocked( Orientation orientation, Matrix<F>& U ) { #ifndef RELEASE CallStackEntry entry("internal::TrdtrmmUUnblocked"); if( U.Height() != U.Width() ) LogicError("U must be square"); if( orientation == NORMAL ) LogicError("Trdtrmm requires (conjugate-)transpose"); #endif const Int n = U.Height(); F* UBuffer = U.Buffer(); const Int ldim = U.LDim(); for( Int j=0; j<n; ++j ) { const F delta11 = UBuffer[j+j*ldim]; if( delta11 == F(0) ) throw SingularMatrixException(); F* RESTRICT u01 = &UBuffer[j*ldim]; if( orientation == ADJOINT ) { // U00 := U00 + u01 (u01 / conj(delta11))^H for( Int k=0; k<j; ++k ) { const F gamma = Conj(u01[k]) / delta11; F* RESTRICT U00Col = &UBuffer[k*ldim]; for( Int i=0; i<=k; ++i ) U00Col[i] += u01[i]*gamma; } } else { // U00 := U00 + u01 (u01 / delta11)^T for( Int k=0; k<j; ++k ) { const F gamma = u01[k] / delta11; F* RESTRICT U00Col = &UBuffer[k*ldim]; for( Int i=0; i<=k; ++i ) U00Col[i] += u01[i]*gamma; } } // u01 := u01 / delta11 for( Int k=0; k<j; ++k ) u01[k] /= delta11; // lambda11 := 1 / delta11 UBuffer[j+j*ldim] = 1 / delta11; } }
inline void TrdtrmmLUnblocked( Orientation orientation, Matrix<F>& L ) { #ifndef RELEASE CallStackEntry entry("internal::TrdtrmmLUnblocked"); if( L.Height() != L.Width() ) LogicError("L must be square"); if( orientation == NORMAL ) LogicError("Trdtrmm requires (conjugate-)transpose"); #endif const Int n = L.Height(); F* LBuffer = L.Buffer(); const Int ldim = L.LDim(); for( Int j=0; j<n; ++j ) { const F delta11 = LBuffer[j+j*ldim]; if( delta11 == F(0) ) throw SingularMatrixException(); F* RESTRICT l10 = &LBuffer[j]; if( orientation == ADJOINT ) { // L00 := L00 + l10^H (l10 / delta11) for( Int k=0; k<j; ++k ) { const F gamma = l10[k*ldim] / delta11; F* RESTRICT L00Col = &LBuffer[k*ldim]; for( Int i=k; i<j; ++i ) L00Col[i] += Conj(l10[i*ldim])*gamma; } } else { // L00 := L00 + l10^T (l10 / delta11) for( Int k=0; k<j; ++k ) { const F gamma = l10[k*ldim] / delta11; F* RESTRICT L00Col = &LBuffer[k*ldim]; for( Int i=k; i<j; ++i ) L00Col[i] += l10[i*ldim]*gamma; } } // l10 := l10 / delta11 for( Int k=0; k<j; ++k ) l10[k*ldim] /= delta11; // lambda11 := 1 / delta11 LBuffer[j+j*ldim] = 1 / delta11; } }
void Kahan( AbstractDistMatrix<F>& A, Int n, F phi ) { DEBUG_ONLY(CSE cse("Kahan")) A.Resize( n, n ); const F zeta = Sqrt(F(1)-phi*Conj(phi)); typedef Base<F> Real; auto kahanFill = [=]( Int i, Int j ) -> F { if( i == j ) { return Pow(zeta,Real(i)); } else if( i < j ) { return -phi*Pow(zeta,Real(i)); } else { return F(0); } }; IndexDependentFill( A, function<F(Int,Int)>(kahanFill) ); }
void MakeSymmetric( UpperOrLower uplo, Matrix<T>& A, bool conjugate ) { DEBUG_CSE const Int n = A.Width(); if( A.Height() != n ) LogicError("Cannot make non-square matrix symmetric"); if( conjugate ) MakeDiagonalReal(A); T* ABuf = A.Buffer(); const Int ldim = A.LDim(); if( uplo == LOWER ) { for( Int j=0; j<n; ++j ) { for( Int i=j+1; i<n; ++i ) { if( conjugate ) ABuf[j+i*ldim] = Conj(ABuf[i+j*ldim]); else ABuf[j+i*ldim] = ABuf[i+j*ldim]; } } } else { for( Int j=0; j<n; ++j ) { for( Int i=0; i<j; ++i ) { if( conjugate ) ABuf[j+i*ldim] = Conj(ABuf[i+j*ldim]); else ABuf[j+i*ldim] = ABuf[i+j*ldim]; } } } }
inline void Conjugate( const Matrix<T>& A, Matrix<T>& B ) { #ifndef RELEASE CallStackEntry entry("Conjugate"); #endif const Int m = A.Height(); const Int n = A.Width(); B.ResizeTo( m, n ); for( Int j=0; j<n; ++j ) for( Int i=0; i<m; ++i ) B.Set(i,j,Conj(A.Get(i,j))); }
void FluxFluxBoundary<D> :: T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); // This FE is already multiplied by normal: const HDivNormalFiniteElement<D-1> & fel_q = // q.n space dynamic_cast<const HDivNormalFiniteElement<D-1>&> (cfel[GetInd1()]); const HDivNormalFiniteElement<D-1> & fel_r = // r.n space dynamic_cast<const HDivNormalFiniteElement<D-1>&> (cfel[GetInd2()]); elmat = SCAL(0.0); IntRange rq = cfel.GetRange(GetInd1()); IntRange rr = cfel.GetRange(GetInd2()); int ndofq = rq.Size(); int ndofr = rr.Size(); FlatMatrix<SCAL> submat(ndofr, ndofq, lh); submat = SCAL(0.0); FlatVector<> qshape(fel_q.GetNDof(), lh); FlatVector<> rshape(fel_r.GetNDof(), lh); const IntegrationRule ir(fel_q.ElementType(), fel_q.Order() + fel_r.Order()); for (int i = 0 ; i < ir.GetNIP(); i++) { MappedIntegrationPoint<D-1,D> mip(ir[i], eltrans); SCAL cc = coeff_c -> T_Evaluate<SCAL>(mip); fel_r.CalcShape (ir[i], rshape); fel_q.CalcShape (ir[i], qshape); // mapped q.n-shape is simply reference q.n-shape / measure qshape *= 1.0/mip.GetMeasure(); rshape *= 1.0/mip.GetMeasure(); // [ndofr x 1] [1 x ndofq] submat += (cc*mip.GetWeight()) * rshape * Trans(qshape); } elmat.Rows(rr).Cols(rq) += submat; if (GetInd1() != GetInd2()) elmat.Rows(rq).Cols(rr) += Conj(Trans(submat)); }