inline typename Base<F>::type LogDetDivergence ( UpperOrLower uplo, const DistMatrix<F>& A, const DistMatrix<F>& B ) { #ifndef RELEASE PushCallStack("LogDetDivergence"); #endif if( A.Grid() != B.Grid() ) throw std::logic_error("A and B must use the same grid"); if( A.Height() != A.Width() || B.Height() != B.Width() || A.Height() != B.Height() ) throw std::logic_error ("A and B must be square matrices of the same size"); typedef typename Base<F>::type R; const int n = A.Height(); const Grid& g = A.Grid(); DistMatrix<F> ACopy( A ); DistMatrix<F> BCopy( B ); Cholesky( uplo, ACopy ); Cholesky( uplo, BCopy ); if( uplo == LOWER ) { Trtrsm( LEFT, uplo, NORMAL, NON_UNIT, F(1), BCopy, ACopy ); } else { MakeTrapezoidal( LEFT, uplo, 0, ACopy ); Trsm( LEFT, uplo, NORMAL, NON_UNIT, F(1), BCopy, ACopy ); } MakeTrapezoidal( LEFT, uplo, 0, ACopy ); const R frobNorm = Norm( ACopy, FROBENIUS_NORM ); R logDet; R localLogDet(0); DistMatrix<F,MD,STAR> d(g); ACopy.GetDiagonal( d ); if( d.InDiagonal() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) { const R delta = RealPart(d.GetLocal(iLocal,0)); localLogDet += 2*Log(delta); } } mpi::AllReduce( &localLogDet, &logDet, 1, mpi::SUM, g.VCComm() ); const R logDetDiv = frobNorm*frobNorm - logDet - R(n); #ifndef RELEASE PopCallStack(); #endif return logDetDiv; }
/* Generates <sample> from multivariate normal distribution, where <mean> - is an average row vector, <cov> - symmetric covariation matrix */ void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples ) { // check mean vector and covariance matrix Mat mean = _mean.getMat(), cov = _cov.getMat(); int dim = (int)mean.total(); // dimensionality CV_Assert(mean.rows == 1 || mean.cols == 1); CV_Assert(cov.rows == dim && cov.cols == dim); mean = mean.reshape(1,1); // ensure a row vector // generate n-samples of the same dimension, from ~N(0,1) _samples.create(nsamples, dim, CV_32F); Mat samples = _samples.getMat(); randn(samples, Scalar::all(0), Scalar::all(1)); // decompose covariance using Cholesky: cov = U'*U // (cov must be square, symmetric, and positive semi-definite matrix) Mat utmat; Cholesky(cov, utmat); // transform random numbers using specified mean and covariance for( int i = 0; i < nsamples; i++ ) { Mat sample = samples.row(i); sample = sample * utmat + mean; } }
bool CMatrixFactorization<double>::InvertSymmetric(CDenseArray<double>& A) { if(A.NCols()!=A.NRows()) { cout << "ERROR: Matrix is not symmetric." << endl; return 1; } Cholesky(A); int lda, info, n; lda = A.NRows(); n = lda; info = 0; double* a = A.Data().get(); dpotri_("U",&n,a,&lda,&info); if(info>0) { cout << "ERROR: Inversion failed." << endl; return 1; } return 0; }
inline typename Base<F>::type LogDetDivergence( UpperOrLower uplo, const Matrix<F>& A, const Matrix<F>& B ) { #ifndef RELEASE PushCallStack("LogDetDivergence"); #endif if( A.Height() != A.Width() || B.Height() != B.Width() || A.Height() != B.Height() ) throw std::logic_error ("A and B must be square matrices of the same size"); typedef typename Base<F>::type R; const int n = A.Height(); Matrix<F> ACopy( A ); Matrix<F> BCopy( B ); Cholesky( uplo, ACopy ); Cholesky( uplo, BCopy ); if( uplo == LOWER ) { Trtrsm( LEFT, uplo, NORMAL, NON_UNIT, F(1), BCopy, ACopy ); } else { MakeTrapezoidal( LEFT, uplo, 0, ACopy ); Trsm( LEFT, uplo, NORMAL, NON_UNIT, F(1), BCopy, ACopy ); } MakeTrapezoidal( LEFT, uplo, 0, ACopy ); const R frobNorm = Norm( ACopy, FROBENIUS_NORM ); Matrix<F> d; ACopy.GetDiagonal( d ); R logDet(0); for( int i=0; i<n; ++i ) logDet += 2*Log( RealPart(d.Get(i,0)) ); const R logDetDiv = frobNorm*frobNorm - logDet - R(n); #ifndef RELEASE PopCallStack(); #endif return logDetDiv; }
bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b, Matx<_Tp, m, n>& x, int method) const { Matx<_Tp, m, m> temp = a; x = b; if( method == DECOMP_CHOLESKY ) return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n); return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0; }
bool MLE_D_FI::NextPoint(ColumnVector& Adj, Real& test) { Tracer tr("MLE_D_FI::NextPoint"); SymmetricMatrix FI = LL.FI(); LT = Cholesky(FI); ColumnVector Adj1 = LT.i() * Derivs; Adj = LT.t().i() * Adj1; test = SumSquare(Adj1); cout << " " << setw(20) << setprecision(10) << test; return (test < Criterion); }
static bool is_positive_definite(const Matrix& A) { if (!is_almost_symmetric(A)) { return false; } try { Cholesky(A); return true; } catch (...) { return false; } }
void Tikhonov ( Orientation orientation, const Matrix<F>& A, const Matrix<F>& B, const Matrix<F>& G, Matrix<F>& X, TikhonovAlg alg ) { DEBUG_CSE const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( G.Width() != n ) LogicError("Tikhonov matrix was the wrong width"); if( orientation == TRANSPOSE && IsComplex<F>::value ) LogicError("Transpose version of complex Tikhonov not yet supported"); if( m >= n ) { Matrix<F> Z; if( alg == TIKHONOV_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<F>(1), A, Z ); else Herk( LOWER, NORMAL, Base<F>(1), A, Z ); Herk( LOWER, ADJOINT, Base<F>(1), G, Base<F>(1), Z ); Cholesky( LOWER, Z ); } else { const Int mG = G.Height(); Zeros( Z, m+mG, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+mG), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); ZB = G; qr::ExplicitTriang( Z ); } if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, F(1), A, B, X ); else Gemm( NORMAL, NORMAL, F(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { LogicError("This case not yet supported"); } }
// Cholesky decomposition method // meant for Kalman filter; check if still valid if not using for that application void invMatrix(struct Matrix *self,struct Matrix *inv) { if (self->numRow==self->numCol) { //check to see if the number of columns equals rows struct Matrix aux; createMatrix(&aux,self->numRow,self->numCol); Cholesky(self,&aux); CholeskyInverse(&aux,inv); deleteMatrix(&aux); } else { //raise error } }
bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const { Matx<_Tp, m, m> temp = a; // assume that b is all 0's on input => make it a unity matrix for( int i = 0; i < m; i++ ) b(i, i) = (_Tp)1; if( method == DECOMP_CHOLESKY ) return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m); return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0; }
/* See Waggoner and Zha, "A Gibbs sampler for structural vector autoregressions", JEDC 2003, for discription of notations. We take the square root of a symmetric and positive definite X to be any matrix Y such that Y*Y'=X. Note that this is not the usual definition because we do not require Y to be symmetric and positive definite. */ void SBVAR_symmetric_linear::SetSimulationInfo(void) { if (NumberObservations() == 0) throw dw_exception("SetSimulationInfo(): cannot simulate if no observations"); TDenseMatrix all_YY, all_XY, all_XX; if (flat_prior) { all_YY=YY; all_XY=XY; all_XX=XX; } else { TDenseMatrix all_Y, all_X; all_Y=VCat(sqrt(lambda)*Data(),sqrt(lambda_bar)*prior_Y); all_X=VCat(sqrt(lambda)*PredeterminedData(),sqrt(lambda_bar)*prior_X); all_YY=Transpose(all_Y)*all_Y; all_XY=Transpose(all_X)*all_Y; all_XX=Transpose(all_X)*all_X; } Simulate_SqrtH.resize(n_vars); Simulate_P.resize(n_vars); Simulate_SqrtS.resize(n_vars); Simulate_USqrtS.resize(n_vars); for (int i=n_vars-1; i >= 0; i--) { TDenseMatrix invH=Transpose(V[i])*(all_XX*V[i]); Simulate_SqrtH[i]=Inverse(Cholesky(invH,CHOLESKY_UPPER_TRIANGULAR),SOLVE_UPPER_TRIANGULAR); Simulate_P[i]=Simulate_SqrtH[i]*(Transpose(Simulate_SqrtH[i])*(Transpose(V[i])*(all_XY*U[i]))); Simulate_SqrtS[i]=sqrt(lambda_T)*Inverse(Cholesky(Transpose(U[i])*(all_YY*U[i]) - Transpose(Simulate_P[i])*(invH*Simulate_P[i]),CHOLESKY_UPPER_TRIANGULAR),SOLVE_UPPER_TRIANGULAR); Simulate_USqrtS[i]=U[i]*Simulate_SqrtS[i]; } simulation_info_set=true; }
void CParam::S5_MuSigma(CData &Data, double f_Sigma,double h_Mu) { vector<Matrix> X = vector<Matrix>(K); int *Counts = new int[K]; for (int k =0; k < K; k++) { if (n_z(k+1) > 0) { X[k] = Matrix(n_z(k+1),n_var_independent); X[k] = 0; Counts[k] = 0; } } for (int i=1; i<=Y_aug_compact.nrows(); i++){ int k = z_aug(i); X[k-1].row(++Counts[k-1]) = Y_aug_compact.row(i) - X_bar.column(k).t(); } SymmetricMatrix SqMatrix; for (int k=1; k<=K ; k++) { // propose Sigma_k_q double f_Sigma_tilde_k = f_Sigma + n_z(k); double h_k = h_Mu + n_z(k); SymmetricMatrix Phi_tilde_k = Phi; ColumnVector mu_tilde_k = mu_bar; if ( n_z(k) > 0) { mu_tilde_k = (h_Mu * mu_bar + X_bar.column(k) * n_z(k)) / h_k; SqMatrix << X[k-1].t() * X[k-1]; Phi_tilde_k += SqMatrix; //can be further optimized ColumnVector xbar_mubar = X_bar.column(k) - mu_bar; SqMatrix << (h_Mu*n_z(k)/h_k) * ( xbar_mubar * xbar_mubar.t()); Phi_tilde_k += SqMatrix ; } LowerTriangularMatrix LPhi_tilde_k = Cholesky(Phi_tilde_k); LowerTriangularMatrix LSigma_k_q = rIW_w_pd_check_fn( f_Sigma_tilde_k, LPhi_tilde_k ); // propose mu_k_q LowerTriangularMatrix LSigma_k_tilde = (1.0/sqrt(h_k)) * LSigma_k_q ; ColumnVector mu_k_q = rMVN_fn( mu_tilde_k, LSigma_k_tilde ); // Modified // Gibbs update Mu.column(k) = mu_k_q ; LSIGMA[k-1] = LSigma_k_q; LSIGMA_i[k-1] = LSigma_k_q.i(); SIGMA[k-1] << LSigma_k_q * LSigma_k_q.t(); logdet_and_more(k) = -0.5*n_var* LOG_2_PI + logdet(LSIGMA_i[k-1]); // S = L * L.t() ; S.i() = (L.i()).t() * L.i() ; Matrix Sigma_k_inv = LSIGMA_i[k-1].t() * LSIGMA_i[k-1]; for (int i_var=1; i_var<= n_var_independent; i_var++) { Sigma_k_inv_ll(k,i_var) = Sigma_k_inv(i_var,i_var); } } delete [] Counts; }
Matrix MvRegSuf::conditional_beta_hat(const SelectorMatrix &included) const { Matrix ans(xdim(), ydim()); std::map<Selector, Cholesky> chol_map; for (int i = 0; i < ydim(); ++i) { const Selector &inc(included.col(i)); auto it = chol_map.find(inc); if (it == chol_map.end()) { chol_map[it->first] = Cholesky(inc.select(xtx())); it = chol_map.find(inc); } ans.col(i) = inc.expand(it->second.solve(inc.select(xty_.col(i)))); } return ans; }
inline void CholeskyUVar2( Matrix<F>& A ) { #ifndef RELEASE PushCallStack("hpd_inverse::CholeskyUVar2"); if( A.Height() != A.Width() ) throw std::logic_error("Nonsquare matrices cannot be triangular"); #endif // Matrix views Matrix<F> ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; // Start the algorithm PartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); while( ATL.Height() < A.Height() ) { RepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); //--------------------------------------------------------------------// Cholesky( UPPER, A11 ); Trsm( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), A11, A01 ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A12 ); Herk( UPPER, NORMAL, F(1), A01, F(1), A00 ); Gemm( NORMAL, NORMAL, F(-1), A01, A12, F(1), A02 ); Herk( UPPER, ADJOINT, F(-1), A12, F(1), A22 ); Trsm( RIGHT, UPPER, ADJOINT, NON_UNIT, F(1), A11, A01 ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, F(-1), A11, A12 ); TriangularInverse( UPPER, NON_UNIT, A11 ); Trtrmm( ADJOINT, UPPER, A11 ); //--------------------------------------------------------------------// SlidePartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); } #ifndef RELEASE PopCallStack(); #endif }
bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b, Matx<_Tp, m, n>& x, int method) const { if (method == DECOMP_LU || method == DECOMP_CHOLESKY) { Matx<_Tp, m, m> temp = a; x = b; if( method == DECOMP_CHOLESKY ) return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n); return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0; } else { return cv::solve(a, b, x, method); } }
inline SafeProduct<F> SafeHPDDeterminantWithOverwrite( UpperOrLower uplo, DistMatrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::SafeHPDDeterminantWithOverwrite"); #endif if( A.Height() != A.Width() ) throw std::logic_error ("Cannot compute determinant of nonsquare matrix"); typedef typename Base<F>::type R; const int n = A.Height(); const R scale = R(n)/R(2); SafeProduct<F> det( n ); const Grid& g = A.Grid(); try { Cholesky( uplo, A ); DistMatrix<F,MD,STAR> d(g); A.GetDiagonal( d ); R localKappa = 0; if( d.InDiagonal() ) { const int nLocalDiag = d.LocalHeight(); for( int iLocal=0; iLocal<nLocalDiag; ++iLocal ) { const R delta = RealPart(d.GetLocal(iLocal,0)); localKappa += Log(delta)/scale; } } mpi::AllReduce( &localKappa, &det.kappa, 1, mpi::SUM, g.VCComm() ); det.rho = F(1); } catch( NonHPDMatrixException& e ) { det.rho = 0; det.kappa = 0; } #ifndef RELEASE PopCallStack(); #endif return det; }
/* Generates <sample> from multivariate normal distribution, where <mean> - is an average row vector, <cov> - symmetric covariation matrix */ void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples ) { Mat mean = _mean.getMat(), cov = _cov.getMat(); int dim = (int)mean.total(); _samples.create(nsamples, dim, CV_32F); Mat samples = _samples.getMat(); randu(samples, 0., 1.); Mat utmat; Cholesky(cov, utmat); int flags = mean.cols == 1 ? 0 : GEMM_3_T; for( int i = 0; i < nsamples; i++ ) { Mat sample = samples.row(i); gemm(sample, utmat, 1, mean, 1, sample, flags); } }
/* See Waggoner and Zha, "A Gibbs sampler for structural vector autoregressions", JEDC 2003, for discription of notations. We take the square root of a symmetric and positive definite X to be any matrix Y such that Y*Y'=X. Note that this is not the usual definition because we do not require Y to be symmetric and positive definite. */ void SBVAR_symmetric_linear::SetPriorSimulationInfo(void) { if (flat_prior) throw dw_exception("flat prior not allowed if simulating from prior"); PriorSimulate_SqrtVariance.resize(n_vars); TDenseMatrix X; for (int i=n_vars-1; i >= 0; i--) { TDenseMatrix S(dim_b[i]+dim_g[i],dim_b[i]+dim_g[i]); S.Insert(0,0,TransposeMultiply(U[i],prior_YY*U[i])); S.Insert(dim_b[i],0,X=-TransposeMultiply(V[i],prior_XY*U[i])); S.Insert(0,dim_b[i],Transpose(X)); S.Insert(dim_b[i],dim_b[i],TransposeMultiply(V[i],prior_XX*V[i])); PriorSimulate_SqrtVariance[i]=Inverse(Cholesky(S,CHOLESKY_UPPER_TRIANGULAR),SOLVE_UPPER_TRIANGULAR); } prior_simulation_info_set=true; }
inline SafeProduct<F> SafeHPDDeterminantWithOverwrite( UpperOrLower uplo, Matrix<F>& A ) { #ifndef RELEASE PushCallStack("internal::SafeHPDDeterminantWithOverwrite"); #endif if( A.Height() != A.Width() ) throw std::logic_error ("Cannot compute determinant of nonsquare matrix"); typedef typename Base<F>::type R; const int n = A.Height(); const R scale = R(n)/R(2); SafeProduct<F> det( n ); try { Cholesky( uplo, A ); Matrix<F> d; A.GetDiagonal( d ); det.rho = F(1); for( int i=0; i<n; ++i ) { const R delta = RealPart(d.Get(i,0)); det.kappa += Log(delta)/scale; } } catch( NonHPDMatrixException& e ) { det.rho = 0; det.kappa = 0; } #ifndef RELEASE PopCallStack(); #endif return det; }
int MultivariateRandomOld(float* random, float* mu, __constant float* Cov, float Sigma, int N, __private int* seed) { float randvalues[2]; float cholCov[4]; switch(N) { case 2: randvalues[0] = normalrand(seed); randvalues[1] = normalrand(seed); Cholesky(cholCov, Sigma, Cov, N); random[0] = mu[0] + cholCov[0 + 0 * N] * randvalues[0] + cholCov[1 + 0 * N] * randvalues[1]; random[1] = mu[1] + cholCov[0 + 1 * N] * randvalues[0] + cholCov[1 + 1 * N] * randvalues[1]; break; case 3: break; case 4: break; default: 1; break; } return 0; }
//--------------------------------------------------------------------------- //--------------------------------------------------------------------------- TGLM::TGLM(my_string* passedParams, int numPassedParams){ if(numPassedParams<3) throw TException("The INTERNALGLM program requires at least three parameters!", _FATAL_ERROR); //the first argument is the name of the matrix file my_string matricesFilename=passedParams[0]; //the second argument is the output filename outputFilename=passedParams[1]; //read file with matrix definitions ifstream is; is.open(matricesFilename.c_str()); if(!is) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' could not be opend!", _FATAL_ERROR); my_string temp, buf; buf.read_line(is); buf.trim_blanks(); if(buf!="C") throw TException("The file with the definitions of the matrices '"+matricesFilename+"' does not start with matrix C (tag missing?)!", _FATAL_ERROR); //read matrix C as a vector of arrays vector<double> firstLine; vector<double*> Ctemp; //read first line buf.read_line(is); buf.trim_blanks(); while(!buf.empty()){ temp=buf.extract_sub_str_before_ws(); temp.trim_blanks(); firstLine.push_back(temp.toDouble()); } numParams=firstLine.size(); Ctemp.push_back(new double[numParams]); for(int i=0;i<numParams;++i) Ctemp[0][i]=firstLine[i]; //read all the other lines buf.read_line(is); while(!buf.contains("c0")){ buf.trim_blanks(); if(!buf.empty()){ Ctemp.push_back(new double[numParams]); int i=0; while(!buf.empty()){ if(i==numParams) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains unequal number of values on the different lines specifying the matrix C!", _FATAL_ERROR); temp=buf.extract_sub_str_before_ws(); temp.trim_blanks(); Ctemp[Ctemp.size()-1][i]=temp.toDouble(); ++i; } if(i<numParams) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains unequal number of values on the different lines specifying the matrix C!", _FATAL_ERROR); } buf.read_line(is); } numStats=Ctemp.size(); C.ReSize(numStats, numParams); for(int i=0; i<numStats; ++i){ for(int j=0; j<numParams; ++j){ C.element(i,j)=Ctemp[i][j]; } } //read the c0 vector c0.ReSize(numStats); buf.read_line(is); buf.trim_blanks(); int i=0; while(!buf.empty()){ if(i==numStats) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains too many values for c0!", _FATAL_ERROR); temp=buf.extract_sub_str_before_ws(); temp.trim_blanks(); c0.element(i)=temp.toDouble(); ++i; } if(i<numStats) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains too few values for c0!", _FATAL_ERROR); //read variances buf.read_line(is); buf.trim_blanks(); if(buf!="Sigma") throw TException("The file with the definitions of the matrices '"+matricesFilename+"' does not contain the matrix Sigma (tag missing?)!", _FATAL_ERROR); Sigma.ReSize(numStats); for(int line=0; line<numStats;++line){ buf.read_line(is); buf.trim_blanks(); if(buf.empty()) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains too few rows for Sigma!", _FATAL_ERROR); i=0; while(!buf.empty()){ if(i==numStats) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains too many values for Sigma on line "+line+"!", _FATAL_ERROR); temp=buf.extract_sub_str_before_ws(); temp.trim_blanks(); Sigma.element(line, i)=temp.toDouble(); ++i; } if(i<numStats) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains too few values for Sigma on line "+line+"!", _FATAL_ERROR); } buf.read_line(is); buf.trim_blanks(); if(!buf.empty()) throw TException("The file with the definitions of the matrices '"+matricesFilename+"' contains too few rows for Sigma!!", _FATAL_ERROR); is.close(); //DONE reading matrix file.... //check the number of passed parameters if(numPassedParams<(2+numParams)) throw TException("Too few parameters passed to the INTERNALGLM program!", _FATAL_ERROR); if(numPassedParams>(2+numParams)) throw TException("Too many parameters passed to the INTERNALGLM program!", _FATAL_ERROR); //prepare some matrices try{ A=Cholesky(Sigma); } catch (...){ throw TException("INTERNALGLM program: problem solving the Cholesky decomposition of Sigma!", _FATAL_ERROR); } e.ReSize(numStats); P.ReSize(numParams); s.ReSize(numStats); }
double CParam::calculate_log_cond_norm(CData &Data, int i_original, ColumnVector &item_by_rnorm, ColumnVector &tilde_y_i, ColumnVector &y_q, bool is_q, LowerTriangularMatrix &LSigma_1_i, ColumnVector &s_q) { // MODIFIED 2015/02/16 double log_cond_norm; if ( item_by_rnorm.sum() >= 1 ) { ColumnVector mu_z_i = Mu.column(z_in(i_original)); ColumnVector s_1_compact = Data.get_compact_vector(item_by_rnorm); ColumnVector Mu_1 = subvector(mu_z_i,s_1_compact); Matrix Sigma_1 = Submatrix_elem_2(SIGMA[z_in(i_original)-1],s_1_compact,s_1_compact); // ADDED 2015/01/27 ColumnVector s_q_compact = Data.get_compact_vector(s_q) ; // MODIFIED 2015/02/16 ColumnVector VectorOne = s_q_compact ; VectorOne = 1 ; // MODIFIED 2015/02/16 ColumnVector s_0_compact = VectorOne - s_q_compact ; // MODIFIED 2015/02/16 int sum_s_0_comp = s_0_compact.sum() ; LowerTriangularMatrix LSigma_cond ; ColumnVector Mu_cond ; if ( sum_s_0_comp>0 ){ ColumnVector Mu_0 = subvector(mu_z_i,s_0_compact); // (s_1_compact.sum()) vector Matrix Sigma_0 = Submatrix_elem_2(SIGMA[z_in(i_original)-1],s_0_compact,s_0_compact); Matrix Sigma_10 = Submatrix_elem_2(SIGMA[z_in(i_original)-1],s_1_compact,s_0_compact); ColumnVector y_tilde_compact = Data.get_compact_vector(tilde_y_i) ; ColumnVector y_tilde_0 = subvector(y_tilde_compact,s_0_compact) ; SymmetricMatrix Sigma_0_symm ; Sigma_0_symm << Sigma_0 ; LowerTriangularMatrix LSigma_0 = Cholesky(Sigma_0_symm) ; Mu_cond = Mu_1 + Sigma_10 * (LSigma_0.i()).t()*LSigma_0.i() * ( y_tilde_0-Mu_0 ) ; Matrix Sigma_cond = Sigma_1 - Sigma_10 * (LSigma_0.i()).t()*LSigma_0.i() * Sigma_10.t() ; SymmetricMatrix Sigma_cond_symm ; Sigma_cond_symm << Sigma_cond ; int sum_s_1_comp = s_1_compact.sum() ; DiagonalMatrix D(sum_s_1_comp) ; Matrix V(sum_s_1_comp,sum_s_1_comp) ; Jacobi(Sigma_cond_symm,D,V) ; int is_zero_exist = 0 ; for (int i_var=1; i_var<=sum_s_1_comp; i_var++){ if ( D(i_var) < 1e-9 ){ D(i_var) = 1e-9 ; is_zero_exist = 1 ; } } // for (int i_var=1; i_var<=sum_s_1_comp; i_var++) if ( is_zero_exist == 1 ){ Sigma_cond_symm << V * D * V.t() ; if ( msg_level >= 1 ) { Rprintf( " Warning: When generating y_j from conditional normal(Mu_-j,Sigma_-j), Sigma_-j is non-positive definite because of computation precision. The eigenvalues D(j,j) smaller than 1e-9 is replaced with 1e-9, and let Sigma_-j = V D V.t().\n"); } } // LSigma_cond = Cholesky(Sigma_cond_symm); // y_part = rMVN_fn(Mu_cond,LSigma_cond); // log_cond_norm = log_MVN_fn(y_part,Mu_cond,LSigma_cond) ; } else { Mu_cond = Mu_1 ; SymmetricMatrix Sigma_1_symm = Submatrix_elem(SIGMA[z_in(i_original)-1],s_1_compact); LSigma_cond = Cholesky(Sigma_1_symm) ; // SymmetricMatrix Sigma_1_symm ; Sigma_1_symm << Sigma_1 ; // LowerTriangularMatrix LSigma_1 = Cholesky_Sigma_star_symm(Sigma_1_symm); // y_part = rMVN_fn(Mu_1,LSigma_1); // log_cond_norm = log_MVN_fn(y_part,Mu_1,LSigma_1) ; } // if ( sum_s_0_comp>0 ) else ... // ADDED 2015/01/26 LowerTriangularMatrix LSigma_cond_i = LSigma_cond.i() ; // LowerTriangularMatrix LSigma_1 = Cholesky(Sigma_1); // LSigma_1_i = LSigma_1.i(); ColumnVector y_part; if (is_q) { y_part = rMVN_fn(Mu_cond,LSigma_cond); } else { ColumnVector y_i = (Y_in.row(i_original)).t(); y_part = subvector(y_i,item_by_rnorm); } log_cond_norm = log_MVN_fn(y_part,Mu_cond,LSigma_cond_i); if (is_q) { y_q = tilde_y_i; for ( int temp_j = 1,temp_count1 = 0; temp_j<=n_var; temp_j++ ){ if ( item_by_rnorm(temp_j)==1 ){ y_q(temp_j) = y_part(++temp_count1); } } } // if (is_q) } else { log_cond_norm = 0; if (is_q) { y_q = tilde_y_i;} } // if ( item_by_rnorm.sum() > = 1 ) else .. return log_cond_norm; }
inline void LocalCholesky( UpperOrLower uplo, DistMatrix<F,STAR,STAR>& A ) { DEBUG_ONLY(CallStackEntry cse("LocalCholesky")) Cholesky( uplo, A.Matrix() ); }
bool is_positive(const matrix& A) { Cholesky(A); return true; }
QDWHInfo QDWHInner( Matrix<F>& A, Base<F> sMinUpper, const QDWHCtrl& ctrl ) { EL_DEBUG_CSE typedef Base<F> Real; typedef Complex<Real> Cpx; const Int m = A.Height(); const Int n = A.Width(); const Real oneThird = Real(1)/Real(3); if( m < n ) LogicError("Height cannot be less than width"); QDWHInfo info; QRCtrl<Base<F>> qrCtrl; qrCtrl.colPiv = ctrl.colPiv; const Real eps = limits::Epsilon<Real>(); const Real tol = 5*eps; const Real cubeRootTol = Pow(tol,oneThird); Real L = sMinUpper / Sqrt(Real(n)); Real frobNormADiff; Matrix<F> ALast, ATemp, C; Matrix<F> Q( m+n, n ); auto QT = Q( IR(0,m ), ALL ); auto QB = Q( IR(m,END), ALL ); while( info.numIts < ctrl.maxIts ) { ALast = A; Real L2; Cpx dd, sqd; if( Abs(1-L) < tol ) { L2 = 1; dd = 0; sqd = 1; } else { L2 = L*L; dd = Pow( 4*(1-L2)/(L2*L2), oneThird ); sqd = Sqrt( Real(1)+dd ); } const Cpx arg = Real(8) - Real(4)*dd + Real(8)*(2-L2)/(L2*sqd); const Real a = (sqd + Sqrt(arg)/Real(2)).real(); const Real b = (a-1)*(a-1)/4; const Real c = a+b-1; const Real alpha = a-b/c; const Real beta = b/c; L = L*(a+b*L2)/(1+c*L2); if( c > 100 ) { // // The standard QR-based algorithm // QT = A; QT *= Sqrt(c); MakeIdentity( QB ); qr::ExplicitUnitary( Q, true, qrCtrl ); Gemm( NORMAL, ADJOINT, F(alpha/Sqrt(c)), QT, QB, F(beta), A ); ++info.numQRIts; } else { // // Use faster Cholesky-based algorithm since A is well-conditioned // Identity( C, n, n ); Herk( LOWER, ADJOINT, c, A, Real(1), C ); Cholesky( LOWER, C ); ATemp = A; Trsm( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), C, ATemp ); Trsm( RIGHT, LOWER, NORMAL, NON_UNIT, F(1), C, ATemp ); A *= beta; Axpy( alpha, ATemp, A ); ++info.numCholIts; } ++info.numIts; ALast -= A; frobNormADiff = FrobeniusNorm( ALast ); if( frobNormADiff <= cubeRootTol && Abs(1-L) <= tol ) break; } return info; }
/// TMLE step void tmleStep(double *a_delta,double *a_tau, int *a_n, double *a_xx, double *a_vv,double *a_lambda, double *a_xGrid, double *a_vGrid, int *a_nx, int *a_nv,double *a_T1,double *a_T2, double *a_Q2,double *a_normGrad, double *a_logL, double *a_epsOpt, int *a_degree){ double slice1[*a_nx],slice2[*a_nx]; double hv=(a_vGrid[*a_nv-1]-a_vGrid[0])/(*a_nv); /// Compute the gradient on the grid for(int j=0;j<*a_nx;j++) { slice1[j]=a_T1[*a_nv*j]*a_Q2[*a_nv*j]+a_T1[*a_nv-1+*a_nv*j]*a_Q2[*a_nv-1+*a_nv*j]; slice2[j]=a_T2[*a_nv*j]*a_Q2[*a_nv*j]+a_T2[*a_nv-1+*a_nv*j]*a_Q2[*a_nv-1+*a_nv*j]; for(int k=1;k<*a_nv/2;k++) { slice1[j]+=2*a_T1[2*k+*a_nv*j]*a_Q2[2*k+*a_nv*j]+4*a_T1[2*k-1+*a_nv*j]*a_Q2[2*k-1+*a_nv*j]; slice2[j]+=2*a_T2[2*k+*a_nv*j]*a_Q2[2*k+*a_nv*j]+4*a_T2[2*k-1+*a_nv*j]*a_Q2[2*k-1+*a_nv*j]; } slice1[j]*=hv/3.0; slice2[j]*=hv/3.0; } /// Compute the Gradient double grad[*a_nx*(*a_nv)]; for(int i=0;i<*a_nv;i++) { for(int j=0;j<*a_nx;j++) { if(ABS(slice1[j])<sqrt(DBL_EPSILON)||ABS(slice2[j])<sqrt(DBL_EPSILON)) { grad[i+*a_nv*j]=0.0; }else { grad[i+*a_nv*j]=(a_T1[i+*a_nv*j]/slice1[j]-a_T2[i+*a_nv*j]/slice2[j]); } grad[i+*a_nv*j]= a_lambda[j]*grad[i+*a_nv*j]; } } /// Evaluate the Gradient @ the observed data double gradObs[*a_n]; Interp(a_xx,a_vv,a_n,gradObs,a_xGrid,a_vGrid,a_nx,a_nv,grad); /// Solve the Optimization Problem and find the epsilon double gradf[*a_degree+1]; double delta[*a_degree+1]; double Hess[(*a_degree+1)*(*a_degree+1)]; double decrement = 10*bigN; double decrementOld=0.0; double eps[*a_degree+1]; // double fOld=0.0; //while (MIN(decrement/2,ABS(decrement-decrementOld)) >= sqrt(DBL_EPSILON)) double m = (double)*a_nv * (double)*a_nv; double barr = bigN; printf("m = %lf : barr = %lf\n",m,barr); double fNew = critFun(a_epsOpt, a_xx, gradObs, *a_n, *a_degree, gradf, Hess, grad, a_xGrid, *a_nv, *a_nx,barr); int Iter = 0; while(1){ Iter++; int iter = 0; while (1) { iter++; printf("[%d](%d) ",Iter,iter); printf("f(theta)=%g | ",fNew); /* solve the system */ Cholesky(Hess,*a_degree+1); LGauss(Hess, gradf, *a_degree+1); ///1 decrement = 0.0; for(int k=0;k<=*a_degree;k++) { decrement += gradf[k]*gradf[k]; } UGauss(Hess, gradf, *a_degree+1); ///2 printf("%lf \n",decrement); if(decrement/2 < sqrt(DBL_EPSILON) && iter > 1) break; double fOld = fNew; double t = 1; for(int k=0;k<=*a_degree;k++) delta[k] = gradf[k]; do{ for(int k=0;k<=*a_degree;k++) eps[k] = a_epsOpt[k] - t*delta[k]; fNew = critFun(eps, a_xx, gradObs, *a_n, *a_degree, gradf, Hess, grad, a_xGrid, *a_nv, *a_nx, barr); t *= beta; //printf("*"); } while (fNew > fOld-alpha*t*decrement); for(int k=0;k<=*a_degree;k++) a_epsOpt[k] = eps[k]; printf("[t=%g | f=%g]\n",t/beta,fNew); } printf("%lf\n",m/barr); if(m/barr < sqrt(DBL_EPSILON)) break; barr *= mu; //break; } printf("\n"); printf("eps: %lf %lf | ",a_epsOpt[0],a_epsOpt[1]); barr = 0.0; double fMax = -critFun(a_epsOpt, a_xx, gradObs, *a_n, *a_degree, gradf, Hess, grad, a_xGrid, *a_nv, *a_nx,barr); printf("f(eps)=%lf\n\n",fMax); ///a_epsOpt=eps; /// Compute Q2 @ observed data + compute empirical Mean and Variance double empMean[*a_degree+1]; double empVar[*a_degree+1]; double sumsq[*a_degree+1]; double offSet = 0.0; int ii = 1; double q2Obs; for(int k=0;k<=*a_degree;k++){ empMean[k] = 0.0; empVar[k] = 0.0; sumsq[k] = 0.0; double x0 = pow(a_xx[0],(double)k); for(int i=0;i<*a_n;i++) { if(k==0){ Interp(&a_xx[i],&a_vv[i],&ii,&q2Obs,a_xGrid,a_vGrid,a_nx,a_nv,a_Q2); offSet += log(q2Obs); } if(ABS(a_xGrid[k])>DBL_EPSILON) { double xi = pow(a_xx[i],(double)k); empMean[k] += (xi*gradObs[i] - x0*gradObs[0]); sumsq[k] += (xi*gradObs[i] - x0*gradObs[0])*(xi*gradObs[i] - x0*gradObs[0]); } } empVar[k] = ((sumsq[k] - empMean[k]*empMean[k]/(*a_n))/(*a_n-1))/(*a_n); empMean[k] /= *a_n; empMean[k] += gradObs[0]; a_normGrad[k]=empMean[k]/sqrt(empVar[k]); } offSet/=*a_n; *a_logL=offSet + fMax; /// Update Q2 for(int i=0;i<*a_nv;i++) { for(int j=0;j<*a_nx;j++) { if((a_vGrid[i] < a_xGrid[j] + *a_delta)) continue; double coef=0.0; if(ABS(a_xGrid[j])>DBL_EPSILON) { for(int k=0; k<=*a_degree;k++) { ///printf("%d ",k); coef += a_epsOpt[k]*pow(a_xGrid[j],(double)k); } } a_Q2[i+*a_nv*j]*=(1.0+grad[i+*a_nv*j]*coef); } } /// Normalize Q2 double normC; Norm(&normC,a_xGrid,a_vGrid,a_nx,a_nv, a_Q2); for(int i=0;i<*a_nv;i++) { for(int j=0;j<*a_nx;j++) { a_Q2[i+*a_nv*j]/=normC; } } }
void Ridge ( Orientation orientation, const Matrix<Field>& A, const Matrix<Field>& B, Base<Field> gamma, Matrix<Field>& X, RidgeAlg alg ) { EL_DEBUG_CSE const bool normal = ( orientation==NORMAL ); const Int m = ( normal ? A.Height() : A.Width() ); const Int n = ( normal ? A.Width() : A.Height() ); if( orientation == TRANSPOSE && IsComplex<Field>::value ) LogicError("Transpose version of complex Ridge not yet supported"); if( m >= n ) { Matrix<Field> Z; if( alg == RIDGE_CHOLESKY ) { if( orientation == NORMAL ) Herk( LOWER, ADJOINT, Base<Field>(1), A, Z ); else Herk( LOWER, NORMAL, Base<Field>(1), A, Z ); ShiftDiagonal( Z, Field(gamma*gamma) ); Cholesky( LOWER, Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else if( alg == RIDGE_QR ) { Zeros( Z, m+n, n ); auto ZT = Z( IR(0,m), IR(0,n) ); auto ZB = Z( IR(m,m+n), IR(0,n) ); if( orientation == NORMAL ) ZT = A; else Adjoint( A, ZT ); FillDiagonal( ZB, Field(gamma) ); // NOTE: This QR factorization could exploit the upper-triangular // structure of the diagonal matrix ZB qr::ExplicitTriang( Z ); if( orientation == NORMAL ) Gemm( ADJOINT, NORMAL, Field(1), A, B, X ); else Gemm( NORMAL, NORMAL, Field(1), A, B, X ); cholesky::SolveAfter( LOWER, NORMAL, Z, X ); } else { Matrix<Field> U, V; Matrix<Base<Field>> s; if( orientation == NORMAL ) { SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = false; SVD( A, U, s, V, ctrl ); } else { Matrix<Field> AAdj; Adjoint( A, AAdj ); SVDCtrl<Base<Field>> ctrl; ctrl.overwrite = true; SVD( AAdj, U, s, V, ctrl ); } auto sigmaMap = [=]( const Base<Field>& sigma ) { return sigma / (sigma*sigma + gamma*gamma); }; EntrywiseMap( s, MakeFunction(sigmaMap) ); Gemm( ADJOINT, NORMAL, Field(1), U, B, X ); DiagonalScale( LEFT, NORMAL, s, X ); U = X; Gemm( NORMAL, NORMAL, Field(1), V, U, X ); } } else { LogicError("This case not yet supported"); } }
// E-step. Calculate Log Probs for each point to belong to each living class // will delete a class if covariance matrix is singular // also counts number of living classes void KK::EStep() { int p, c, cc, i; int nSkipped; float LogRootDet; // log of square root of covariance determinant float Mahal; // Mahalanobis distance of point from cluster center Array<float> Chol(nDims2); // to store choleski decomposition Array<float> Vec2Mean(nDims); // stores data point minus class mean Array<float> Root(nDims); // stores result of Chol*Root = Vec float *OptPtrLogP; int *OptPtrClass = Class.m_Data; int *OptPtrOldClass = OldClass.m_Data; nSkipped = 0; // start with cluster 0 - uniform distribution over space // because we have normalized all dims to 0...1, density will be 1. for (p=0; p<nPoints; p++) LogP[p*MaxPossibleClusters + 0] = (float)-log(Weight[0]); for (cc=1; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; // calculate cholesky decomposition for class c if (Cholesky(Cov.m_Data+c*nDims2, Chol.m_Data, nDims)) { // If Cholesky returns 1, it means the matrix is not positive definite. // So kill the class. Output("Deleting class %d: covariance matrix is singular\n", c); ClassAlive[c] = 0; continue; } // LogRootDet is given by log of product of diagonal elements LogRootDet = 0; for(i=0; i<nDims; i++) LogRootDet += (float)log(Chol[i*nDims + i]); for (p=0; p<nPoints; p++) { // optimize for speed ... OptPtrLogP = LogP.m_Data + (p*MaxPossibleClusters); // to save time -- only recalculate if the last one was close if ( !FullStep // Class[p] == OldClass[p] // && LogP[p*MaxPossibleClusters+c] - LogP[p*MaxPossibleClusters+Class[p]] > DistThresh && OptPtrClass[p] == OptPtrOldClass[p] && OptPtrLogP[c] - OptPtrLogP[OptPtrClass[p]] > DistThresh ) { nSkipped++; continue; } // Compute Mahalanobis distance Mahal = 0; // calculate data minus class mean for(i=0; i<nDims; i++) Vec2Mean[i] = Data[p*nDims + i] - Mean[c*nDims + i]; // calculate Root vector - by Chol*Root = Vec2Mean TriSolve(Chol.m_Data, Vec2Mean.m_Data, Root.m_Data, nDims); // add half of Root vector squared to log p for(i=0; i<nDims; i++) Mahal += Root[i]*Root[i]; // Score is given by Mahal/2 + log RootDet - log weight // LogP[p*MaxPossibleClusters + c] = Mahal/2 OptPtrLogP[c] = Mahal/2 + LogRootDet - log(Weight[c]) + (float)log(2*M_PI)*nDims/2; /* if (Debug) { if (p==0) { Output("Cholesky\n"); MatPrint(stdout, Chol.m_Data, nDims, nDims); Output("root vector:\n"); MatPrint(stdout, Root.m_Data, 1, nDims); Output("First point's score = %.3g + %.3g - %.3g = %.3g\n", Mahal/2, LogRootDet , log(Weight[c]), LogP[p*MaxPossibleClusters + c]); } } */ } } // Output("Skipped %d ", nSkipped); }
int main(int argc, char **argv) { #define test_A(i,j) test_A[(size_t)(j)*N+(i)] #define test_A2(i,j) test_A2[(size_t)(j)*N+(i)] int N,NB,w,LDA,BB; size_t memsize; //bytes int iam, nprocs, mydevice; int ICTXT, nprow, npcol, myprow, mypcol; int i_one = 1, i_zero = 0, i_negone = -1; double d_one = 1.0, d_zero = 0.0, d_negone = -1.0; int IASEED = 100; /* printf("N=?\n"); scanf("%ld",&N); printf("NB=?\n"); scanf("%d", &NB); printf("width of Y panel=?\n"); scanf("%ld",&w); */ if(argc < 4){ printf("invalid arguments N NB memsize(M)\n"); exit(1); } N = atoi(argv[1]); NB = atoi(argv[2]); memsize = (size_t)atoi(argv[3])*1024*1024; BB = (N + NB - 1) / NB; w = memsize/sizeof(double)/BB/NB/NB - 1; assert(w > 0); LDA = N + 0; //padding int do_io = (N <= NSIZE); double llttime; double gflops; nprow = npcol = 1; blacs_pinfo_(&iam, &nprocs); blacs_get_(&i_negone, &i_zero, &ICTXT); blacs_gridinit_(&ICTXT, "R", &nprow, &npcol); blacs_gridinfo_(&ICTXT, &nprow, &npcol, &myprow, &mypcol); #ifdef USE_MIC #ifdef __INTEL_OFFLOAD printf("offload compilation enabled\ninitialize each MIC\n"); offload_init(&iam, &mydevice); #pragma offload target(mic:0) { mkl_peak_mem_usage(MKL_PEAK_MEM_ENABLE); } #else if(isroot) printf("offload compilation not enabled\n"); exit(0); #endif #else #ifdef USE_CUBLASV2 { cublasStatus_t cuStatus; for(int r = 0; r < OOC_NTHREADS; r++){ cuStatus = cublasCreate(&worker_handle[r]); assert(cuStatus == CUBLAS_STATUS_SUCCESS); } } #else cublasInit(); #endif #endif double *test_A = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for chol #ifdef VERIFY double *test_A2 = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for verify #endif /*Initialize A */ int i,j; printf("Initialize A ... "); fflush(stdout); llttime = MPI_Wtime(); pdmatgen(&ICTXT, "Symm", "Diag", &N, &N, &NB, &NB, test_A, &LDA, &i_zero, &i_zero, &IASEED, &i_zero, &N, &i_zero, &N, &myprow, &mypcol, &nprow, &npcol); llttime = MPI_Wtime() - llttime; printf("time %lf\n", llttime); /*print test_A*/ if(do_io){ printf("Original A=\n\n"); matprint(test_A, N, LDA, 'A'); } /*Use directed unblocked Cholesky factorization*/ /* t1 = clock(); Test_dpotrf(test_A2,N); t2 = clock(); printf ("time for unblocked Cholesky factorization on host %f \n", ((float) (t2 - t1)) / CLOCKS_PER_SEC); */ /*print test_A*/ /* if(do_io){ printf("Unblocked result:\n\n"); matprint(test_A2,N,'L'); } */ /*Use tile algorithm*/ Quark *quark = QUARK_New(OOC_NTHREADS); QUARK_DOT_DAG_Enable(quark, 0); #ifdef USE_MIC // mklmem(NB); printf("QUARK MIC affinity binding\n"); QUARK_bind(quark); printf("offload warm up\n"); warmup(quark); #endif QUARK_DOT_DAG_Enable(quark, quark_getenv_int("QUARK_DOT_DAG_ENABLE", 0)); printf("LLT start %lf\n", MPI_Wtime()); llttime = Cholesky(quark,test_A,N,NB,LDA,memsize); printf("LLT end %lf\n", MPI_Wtime()); QUARK_Delete(quark); #ifdef USE_MIC offload_destroy(); #else #ifdef USE_CUBLASV2 { cublasStatus_t cuStatus; for(int r = 0; r < OOC_NTHREADS; r++){ cuStatus = cublasDestroy(worker_handle[r]); assert(cuStatus == CUBLAS_STATUS_SUCCESS); } } #else cublasShutdown(); #endif #endif gflops = (double) N; gflops = gflops/3.0 + 0.5; gflops = gflops*(double)(N)*(double)(N); gflops = gflops/llttime/1024.0/1024.0/1024.0; printf ("N NB memsize(MB) quark_pthreads time Gflops\n%d %d %lf %d %lf %lf\n", N, NB, (double)memsize/1024/1024, OOC_NTHREADS, llttime, gflops); #ifdef USE_MIC #pragma offload target(mic:0) { memsize = mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); } printf("mkl_peak_mem_usage %lf MB\n", (double)memsize/1024.0/1024.0); #endif /*Update and print L*/ if(do_io){ printf("L:\n\n"); matprint(test_A,N,LDA,'L'); } #ifdef VERIFY printf("Verify... "); llttime = MPI_Wtime(); /* * ------------------------ * check difference betwen * test_A and test_A2 * ------------------------ */ /* { double maxerr = 0; double maxerr2 = 0; for (j = 0; j < N; j++) { for (i = j; i < N; i++) { double err = (test_A (i, j) - test_A2 (i, j)); err = ABS (err); maxerr = MAX (err, maxerr); maxerr2 = maxerr2 + err * err; }; }; maxerr2 = sqrt (ABS (maxerr2)); printf ("max difference between test_A and test_A2 %lf \n", maxerr); printf ("L2 difference between test_A and test_A2 %lf \n", maxerr2); }; */ /* * ------------------ * over-write test_A2 * ------------------ */ pdmatgen(&ICTXT, "Symm", "Diag", &N, &N, &NB, &NB, test_A2, &LDA, &i_zero, &i_zero, &IASEED, &i_zero, &N, &i_zero, &N, &myprow, &mypcol, &nprow, &npcol); /* * --------------------------------------- * after solve, test_A2 should be identity * --------------------------------------- */ // test_A = chol(B) = L; // test_A2 = B // solve L*L'*X = B // if L is correct, X is identity */ { int uplo = 'L'; const char *uplo_char = ((uplo == (int) 'U') || (uplo == (int) 'u')) ? "U" : "L"; int info = 0; int nrhs = N; int LDA = N; int ldb = N; dpotrs(uplo_char, &N, &nrhs, test_A, &LDA, test_A2, &ldb, &info); assert (info == 0); } { double maxerr = 0; double maxerr2 = 0; for (j = 0; j < N; j++) { for (i = 0; i < N; i++) { double eyeij = (i == j) ? 1.0 : 0.0; double err = (test_A2 (i, j) - eyeij); err = ABS (err); maxerr = MAX (maxerr, err); maxerr2 = maxerr2 + err * err; }; }; maxerr2 = sqrt (ABS (maxerr2)); printf("time %lf\n", MPI_Wtime() - llttime); printf ("max error %lf \n", maxerr); printf ("max L2 error %lf \n", maxerr2); } #endif free(test_A);test_A=NULL; #ifdef VERIFY free(test_A2);test_A2=NULL; #endif blacs_gridexit_(&ICTXT); blacs_exit_(&i_zero); return 0; #undef test_A #undef test_A2 }
gauss_t *GetGauss(FILE *fp) { char line[MLL],*cp; int i,j; gauss_t *mp; if (verbose) fprintf(stderr,"Reading Gaussian...\n"); nonull(mp=malloc(sizeof(gauss_t))); mp->dim=1; mp->type=FULL; mp->prior_prob=0.0; while (1) { if (fgets(line,MLL,fp)==NULL) { mp->type=UNKNOWN; break; } if ((cp=strtok(line,IFS))==NULL) continue; if (strcmp(cp,"GAUSS")!=0) continue; if ((cp=strtok(NULL,IFS))==NULL) continue; nonull(mp->label=malloc((strlen(cp)+1)*sizeof(char))); strcpy(mp->label,cp); if ((cp=strtok(NULL,IFS))==NULL) break; if ((mp->dim=atoi(cp))<1) { free(mp->label); continue; } if ((cp=strtok(NULL,IFS))==NULL) break; if (strncmp(cp,"Diagonal",1)==0) mp->type=DIAG; if ((cp=strtok(NULL,IFS))!=NULL) mp->prior_prob=atof(cp); if (verbose) { fprintf(stderr,"label=%s dim=%d ",mp->label,mp->dim); if (mp->type==DIAG) fprintf(stderr,"type=Diag\n"); else fprintf(stderr,"type=Full\n"); } break; } if (mp->type==UNKNOWN) { if (verbose) fprintf(stderr,"no more gaussians.\n"); return NULL; } if (verbose) fprintf(stderr,"mean...\n"); nonull(mp->mean=malloc(mp->dim*sizeof(double))); nonull(GetVector(fp,mp->dim,mp->mean)); if (verbose) fprintf(stderr,"covariance matrix...\n"); if (mp->type==DIAG) { mp->covar=mp->Cholesky=NULL; nonull(mp->dcovar=malloc(mp->dim*sizeof(double))); nonull(GetVector(fp,mp->dim,mp->dcovar)); nonull(mp->dCholesky=malloc(mp->dim*sizeof(double))); for (i=0; i<mp->dim; i++) mp->dCholesky[i]=sqrt(mp->dcovar[i]); } else { mp->dcovar=mp->dCholesky=NULL; nonull(mp->covar=malloc(mp->dim*sizeof(double *))); nonull(mp->Cholesky=malloc(mp->dim*sizeof(double *))); for (i=0; i<mp->dim; i++) { nonull(mp->covar[i]=malloc((i+1)*sizeof(double))); nonull(GetVector(fp,i+1,mp->covar[i])); nonull(mp->Cholesky[i]=malloc((i+1)*sizeof(double))); for (j=0; j<=i; j++) mp->Cholesky[i][j]=mp->covar[i][j]; } Cholesky(mp->dim,mp->Cholesky); } if (verbose) fprintf(stderr,"end GAUSS.\n"); return mp; }