NT2_TEST_CASE_TPL ( chol_lower2, NT2_REAL_TYPES) { typedef nt2::table<T> table_t; table_t a , b = nt2::ones(4, 4, nt2::meta::as_<T>()) + T(10)*nt2::eye(4, 4, nt2::meta::as_<T>()); a = nt2::chol(b, nt2::lower_); // NT2_DISPLAY(chol(b, nt2::lower_)); NT2_DISPLAY(a); table_t u = nt2::ones(4, 9, nt2::meta::as_<T>()); NT2_DISPLAY(u); u(nt2::_(1,4),nt2::_(1,4)) = chol(b, nt2::lower_); NT2_DISPLAY(u); table_t x; x = chol(b, nt2::lower_) + b; NT2_DISPLAY(x); b = nt2::zeros(4, 4, nt2::meta::as_<T>()); b(1,1) = 1; a = nt2::chol(b, nt2::lower_); NT2_DISPLAY(a); }
// assume that A, which is triangular, is stored in recursive L; that means that the square block is stored in recursive backwards N void chol( double *A, int n ) { // base case if( n <= nmin ) { // probably we want to copy into full, since there doesn't seem to be a blocked packed cholesky in lapack; but the easy version for now int info = 0; //char L = 'L'; //dpotrf_( &L, &size, Afull, &size, &info); //dpptrf_( &L, &n, A, &info); //A[0] = sqrt(A[0]); // this uses the unpacked, but blocked version. double *temp = (double*) malloc( n*n*sizeof(double) ); double *Ap = A; for( int c = 0; c < n; c++ ) for( int r = c; r < n; r++ ) temp[c*n+r] = *(Ap++); char L = 'L', N = 'N'; double none = -1., one = 1.; dpotrf_( &L, &n, temp, &n, &info); Ap = A; for( int c = 0; c < n; c++ ) for( int r = c; r < n; r++ ) *(Ap++) = temp[c*n+r]; free(temp); return; } int nhalf = n/2; double *A11 = A; double *A21 = A+nhalf*(nhalf+1)/2; double *A22 = A21+nhalf*nhalf; chol(A11,nhalf); trsm(A21,A11,nhalf); syrk(A22,A21,nhalf); chol(A22,nhalf); }
// Conversion from a moment Gaussian. canonical_gaussian_param& operator=(const moment_gaussian_param<T>& mg) { Eigen::LLT<mat_type>chol(mg.cov); if (chol.info() != Eigen::Success) { throw numerical_error( "canonical_gaussian: Cannot invert the covariance matrix. " "Are you passing in a non-singular moment Gaussian distribution?" ); } mat_type sol_xy = chol.solve(mg.coef); std::size_t m = mg.head_size(); std::size_t n = mg.tail_size(); resize(m + n); eta.segment(0, m) = chol.solve(mg.mean); eta.segment(m, n).noalias() = -sol_xy.transpose() * mg.mean; lambda.block(0, 0, m, m) = chol.solve(mat_type::Identity(m, m)); lambda.block(0, m, m, n) = -sol_xy; lambda.block(m, 0, n, m) = -sol_xy.transpose(); lambda.block(m, m, n, n).noalias() = mg.coef.transpose() * sol_xy; lm = mg.lm - T(0.5) * (eta.segment(0, m).dot(mg.mean) + logdet(chol) + m * std::log(two_pi<T>())); return *this; }
int main(int argc, char** argv) { double *A; int n, ret, event; double startTime; double endTime; long long value; n = atoi(argv[2]); A = load_matrix(argv[1], n); event = atoi(argv[3]); if (event != 5) { papi_init(event); papi_start(); } else { startTime = dclock(); } ret = chol(A, n); if (event != 5) { value = papi_stop(); printf("%lld\n", value); } else { endTime = dclock(); printf("%lf\n", endTime - startTime); } fprintf(stderr, "RET:%d\n", ret); check(A,n); free(A); return 0; }
fmat BFilterUKF::sigmas(fvec x, fmat P, float c) { //Sigma points around reference point //Inputs: // x: reference point // P: covariance // c: coefficient //Output: // X: Sigma points fmat X; fmat cholP = chol(P); fmat A = c*cholP.t(); // Y = x(:,ones(1,numel(x))); fmat Y = zeros<fmat>(x.n_rows, x.n_elem); for (unsigned int j = 0; j < x.n_elem; ++j) { for (unsigned int i = 0; i < x.n_rows; ++i) { Y(i,j) = x1(i); } } //X = [x Y+A Y-A]; X = fmat(x); X.insert_cols(X.n_cols,Y+A); X.insert_cols(X.n_cols,Y-A); return X; }
/// \brief Create Sigma point to represent a normal distribution void UnscentedKalmanFilter::sigmas(vnl_vector<double> x, vnl_matrix<double> P, double c, vnl_matrix<double> &X) { // Copy matrix P to A vnl_matrix<double> A = P; // Compute the Cholesky decomposition of P vnl_cholesky chol(A,vnl_cholesky::verbose); A = chol.upper_triangle(); // Apply the weight c A = c*A.transpose(); // Create matrix Y with copies of reference point x vnl_matrix<double> Y(N,N); for( int i = 0; i<N; i++ ) Y.set_column(i,x); // Add and subtract A from Y vnl_matrix<double> YpA = Y + A; vnl_matrix<double> YmA = Y - A; // Set columns of X with Sigma points X.set_column(0,x); for( int i = 0; i<N; i++ ) { X.set_column(i+1,YpA.get_column(i)); X.set_column(i+1+N,YmA.get_column(i)); } }
// comms is an array of communicators of length P. Only the I and J entries will be valid. They are the communicators along the given row/column of X. int cholesky( double *L, int h, int bs, int n, int I, int J, int P, MPI_Comm *comms ) { int info1; int info2 = 0; int bs2 = bs*bs; int sbs2 = bs*bs; if( I != J ) sbs2 = 2*sbs2; // compute the cholesky factorization of the first block info1 = chol( L, bs, n, I, J, P, comms ); if( h > 1 ) { // trsm of the first block column double *X = L+bs2; tstrsm( X, L, h-1, bs, I, J, P, comms ); // syrk of the rest of the matrix double *Lnew = X+(h-1)*sbs2; tssyrk( X, Lnew, h-1, bs, I, J, P, comms ); // cholesky of the rest of the matrix info2 = cholesky( Lnew, h-1, bs, n-P*bs, I, J, P, comms ); } if( info1 ) return info1; return info2; }
int main( int argc, char **argv ) { int size = 256; // generate a symmetric, positive definite matrix double *M = (double *) malloc( size*size*sizeof(double) ); fill( M, size*size); double *Afull = (double *) malloc( size*size*sizeof(double) ); char T = 'T', N = 'n'; double one = 1., zero = 0.; dgemm_( &T, &N, &size, &size, &size, &one, M, &size, M, &size, &zero, Afull, &size ); double *A = (double *) malloc( size*(size+1)/2*sizeof(double) ); double *Acopy = (double *) malloc( size*(size+1)/2*sizeof(double) ); for( int r = 0; r < size; r++ ) for( int c = 0; c <= r; c++ ) { setEntry(A, size, r, c, Afull[r*size+c]); setEntry(Acopy, size, r, c, Afull[r*size+c], size); } //printMatrix(Afull,size); double startTime = read_timer(); chol(A,size); double endTime = read_timer(); printf("Time %f Gflop/s %f\n", endTime-startTime, size*size*size/3./(endTime-startTime)/1.e9); int info = 0; char L = 'L'; startTime = read_timer(); dpptrf_( &L, &size, Acopy, &info); endTime = read_timer(); printf("dpptrf Time %f Gflop/s %f\n", endTime-startTime, size*size*size/3./(endTime-startTime)/1.e9); printf("info is %d, size is %d\n", info, size ); startTime = read_timer(); dpotrf_( &L, &size, Afull, &size, &info); endTime = read_timer(); printf("dpotrf Time %f Gflop/s %f\n", endTime-startTime, size*size*size/3./(endTime-startTime)/1.e9); //printMatrix(Afull,size); double maxDev = 0.; /* for( int r = 0; r < size; r++ ) for( int c = 0; c <= r; c++ ) { maxDev = max(maxDev,fabs(Afull[r*size+c]-getEntry(A,size,r,c))); Afull[r*size+c] = getEntry(A,size,r,c); } */ for( int r = 0; r < size; r++ ) for( int c = 0; c < size; c++ ) { maxDev = max(maxDev,fabs(getEntry(Acopy,size,r,c,size)-getEntry(A,size,r,c))); // Afull[r*size+c] = getEntry(Acopy,size,r,c,size); } //printMatrix(Afull,size); printf("Max deviation: %f\n", maxDev); //for( int r = 0; r < size; r++ ) // for( int c = 0; c < size; c++ ) { // Afull[r*size+c] = getEntry(A,size,r,c); // } //printMatrix(Afull,size); return 0; }
// sigma denotes cov matrix rather than precision matrix double multivariate_normal_sigma_logp(const arma::vec& x, const arma::vec& mu, const arma::mat& sigma) { const double log_2pi = log(2 * arma::math::pi()); arma::mat R(arma::zeros<arma::mat>(sigma.n_cols,sigma.n_cols)); // non-positive definite test via chol if(chol(R,sigma) == false) { return -std::numeric_limits<double>::infinity(); } // otherwise calc logp return -(x.n_elem * log_2pi + log_approx(arma::det(sigma)) + mahalanobis(x,mu,sigma))/2; }
//[[Rcpp::export]] vec breg(vec const& y, mat const& X, vec const& betabar, mat const& A) { // Keunwoo Kim 06/20/2014 // Purpose: draw from posterior for linear regression, sigmasq=1.0 // Output: draw from posterior // Model: y = Xbeta + e e ~ N(0,I) // Prior: beta ~ N(betabar,A^-1) int k = betabar.size(); mat RA = chol(A); mat W = join_cols(X, RA); //same as rbind(X,RA) vec z = join_cols(y, RA*betabar); mat IR = solve(trimatu(chol(trans(W)*W)), eye(k,k)); //trimatu interprets the matrix as upper triangular and makes solve more efficient return ((IR*trans(IR))*(trans(W)*z) + IR*vec(rnorm(k))); }
WM::WishartModel(double pri_df, const SpdMatrix &PriVarEst) : ParamPolicy(new UnivParams(pri_df), new SpdParams(PriVarEst*pri_df)), DataPolicy(new WS(PriVarEst.nrow())), PriorPolicy() { Chol chol(sumsq()); if (!chol.is_pos_def()) { report_error("Sum of squares matrix must be positive definite in " "WishartModel constructor"); } }
inline bool chol(Mat<typename T1::elem_type>& out, const Base<typename T1::elem_type,T1>& X, const typename arma_blas_type_only<typename T1::elem_type>::result* junk = 0) { arma_extra_debug_sigprint(); arma_ignore(junk); out = chol(X); return (out.n_elem == 0) ? false : true; }
/** * simulation of a multivariate normal * * @param d the dimension * @param m the mean vector * @param v the positive-definite covariance matrix * @param s the vector to store the simulation * */ static void rmvnorm(int d, double *m, double *v, double *s){ int incx = 1; double *lv = Calloc(d * d, double) ; /* simulate d univariate normal r.v.s */ for (int i = 0; i < d; i++) s[i] = rnorm(0, 1) ; /* cholesky factor of v */ chol(d, v, lv) ; /* scale and shift univariate normal r.v.s */ F77_CALL(dtrmv)("L", "N", "N", &d, lv, &d, s, &incx) ; for (int i = 0; i < d; i++) s[i] += m[i] ; Free(lv) ; }
int main( int argc, char **argv ) { initCommunication( &argc, &argv ); // make up a simple test int size = read_int( argc, argv, "-s", 8 ); int r = read_int( argc, argv, "-r", 2 ); int P; MPI_Comm_size( MPI_COMM_WORLD, &P ); initSizes( P, r, size ); if( getRank() == 0 ) { if( P > (1<<r) ) printf("Need more recursive steps for this many processors\n"); if( P > (size/(1<<r))*(size/(1<<r)+1)/2) printf("Need a bigger matrix/fewer recursive steps for this many processors\n"); printf("-s %d -r %d -n %d\n", size, r, P); } int sizeSq = getSizeSq(r,P); int sizeTri = getSizeTri(r,P); double *X = (double*) malloc( sizeSq*sizeof(double) ); srand48(getRank()); fill(X,sizeSq); double *A = (double*) malloc( sizeTri*sizeof(double) ); if( getRank() == 0 ) printf("Generating a symmetric positive definite test matrix\n"); initTimers(); MPI_Barrier( MPI_COMM_WORLD ); double st2 = read_timer(); syrk( A, X, size, P, r, 0. ); MPI_Barrier( MPI_COMM_WORLD ); double et2 = read_timer(); if( getRank() == 0 ) printf("Generation time: %f\n", et2-st2); initTimers(); free(X); for( int i = 0; i < sizeTri; i++ ) A[i] = -A[i]; if( getRank() == 0 ) printf("Starting benchmark\n"); MPI_Barrier( MPI_COMM_WORLD ); double startTime = read_timer(); chol( A, size, P, r ); MPI_Barrier( MPI_COMM_WORLD ); double endTime = read_timer(); if( getRank() == 0 ) printf("Time: %f Gflop/s %f\n", endTime-startTime, size*1.*size*size/3./(endTime-startTime)/1.e9); free(A); printCounters(size); MPI_Finalize(); }
int main(int argc, char *argv[]) { unsigned n; int evt; double *A; int i, j; double checksum = 0; double startTime, endTime; long long counter; if (argc < 2) { return -1; } n = atoi(argv[1]); evt = (argc > 2) ? atoi(argv[2]) : -1; A = randomMatrix(n); assert(A != NULL); if (evt == -1) { startTime = dclock(); } else { papi_init(evt); papi_start(); } if (chol(A, n)) { fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); } else { for (i = 0; i < n; i++) { for (j = i; j < n; j++) { checksum += A[IDX(i, j, n)]; } } printf("Checksum: %f \n", checksum); } if (evt == -1) { endTime = dclock(); fprintf(stderr, "%f\n", endTime - startTime); } else { counter = papi_stop(); fprintf(stderr, "%lld\n", counter); } free(A); return 0; }
void BFilterUKF::update(fvec measurement){ //unscented transformation of process unsigned int numMeasurements = measurement.n_cols; utMeasurement(X1, Wm, Wc, numMeasurements, process->H); //unscented transformation of measurments fmat P12=X2*Wc.diag()*Z2.t(); //transformed cross-covariance fmat R = chol(P2); fmat K = (P12/R)/R.t(); // Filter gain. // K=P12*P2.i(); particles.samples=x1+K*(measurement-z1); //state update P=P1-K*P12.t(); //covariance update }
//------------------------------------------------------------------------------ mat MfLowRankApproximation::cMatrix(const vec &g, const mat &h, double dx) { int n = h.n_rows; int nSpatial = g.n_rows; mat S = zeros(n, n); for(int i=0; i<n; i++) { for(int j=0; j<n; j++) { S(i,j) = 0; for(int m=0; m<nSpatial; m++) { S(i,j) += g(m)*h(m,i)*h(m,j); } } } return chol(S);; }
int main(int argc, char *argv[]) { if(argc!=3){ std::cerr<<"Usage:\n cat pedigree | "<<argv[0]<<" V_p h2 > result\n"; return 1; } std::vector<parent>ped; {//Read the pedigree int pa, ma; while(std::cin>>pa>>ma) ped.push_back(parent(pa,ma)); } long dim(ped.size()); long len(dim*dim); double A[len], D[dim]; amatrix(ped, A); //the A matrix chol(A, D, dim); //its Cholesky factor double vp(atof(argv[1])), h2(atof(argv[2])); double sdg(std::sqrt(vp*h2)), sde(std::sqrt(vp*(1-h2))); double rg[dim], td; long i, j, k(dim), w{1}; while(k){//format control ++w; k/=10; } // baking the random generator std::random_device rdv; int seeds[LEN]; {for(auto&x:seeds) x = rdv();} std::seed_seq seq(seeds, seeds + LEN); std::mt19937 rng(seq); Gauss gg(0, sdg), ee(0, sde); std::cout<<std::fixed; std::cout.precision(3); for(auto&p:rg) p=gg(rng); for(i=0; i<dim; ++i){ td = 0.; for(j=0; j<=i; ++j) td+=A[j*dim+i]*rg[j]; std::cout<<std::setw(w)<<ped[i].pa<<std::setw(w)<<ped[i].ma<<' '; std::cout<<' '<<td<<' '<<td+ee(rng)<<'\n'; } return 0; }
//--------------------------------------------------------- Cub2D& NDG2D::CubatureVolumeMesh2D(int Corder) //--------------------------------------------------------- { // function cub = CubatureVolumeMesh2D(Corder) // purpose: build cubature nodes, weights and geometric factors for all elements // // Note: m_cub is member of Globals2D // set up cubature nodes Cubature2D(Corder, m_cub); // evaluate generalized Vandermonde of Lagrange interpolant functions at cubature nodes InterpMatrix2D(m_cub); // evaluate local derivatives of Lagrange interpolants at cubature nodes Dmatrices2D(this->N, m_cub); // evaluate the geometric factors at the cubature nodes GeometricFactors2D(m_cub); // custom mass matrix per element DMat mmk; DMat_Diag D; DVec d; m_cub.mmCHOL.resize(Np*Np, K); m_cub.mm .resize(Np*Np, K); for (int k=1; k<=K; ++k) { d=m_cub.J(All,k); d*=m_cub.w; D.diag(d); // weighted diagonal mmk = m_cub.VT * D * m_cub.V; // mass matrix for element k m_cub.mm(All,k) = mmk; // store mass matrix m_cub.mmCHOL(All,k) = chol(mmk); // store Cholesky factorization } // incorporate weights and Jacobian m_cub.W = outer(m_cub.w, ones(K)); m_cub.W.mult_element(m_cub.J); // compute coordinates of cubature nodes m_cub.x = m_cub.V * this->x; m_cub.y = m_cub.V * this->y; return m_cub; }
int main(int argc, char** argv) { double *A; int n, ret, event; double startTime; double endTime; long long value; n = atoi(argv[2]); A = load_matrix(argv[1], n); event = atoi(argv[3]); init(); startCount(event); startTime = dclock(); ret = chol(A, n); stopAndPrint(); endTime = dclock(); // printf("%lf\n", endTime - startTime); free(A); return 0; }
arma::vec GMM<FittingType>::Random() const { // Determine which Gaussian it will be coming from. double gaussRand = math::Random(); size_t gaussian = 0; double sumProb = 0; for (size_t g = 0; g < gaussians; g++) { sumProb += weights(g); if (gaussRand <= sumProb) { gaussian = g; break; } } return trans(chol(covariances[gaussian])) * arma::randn<arma::vec>(dimensionality) + means[gaussian]; }
int main() { double *A; int i, j, n, ret; n = 2000; A = (double*)calloc(n*n, sizeof(double)); assert(A != NULL); for(i=0; i<n; i++) A[IDX(i, i, n)] = 2; chol(A,n); // if (!chol(A, n)); // else // fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); free(A); return 0; }
// [[Rcpp::export]] List rwishart(double nu, mat const& V){ // Wayne Taylor 4/7/2015 // Function to draw from Wishart (nu,V) and IW // W ~ W(nu,V) // E[W]=nuV // WI=W^-1 // E[WI]=V^-1/(nu-m-1) // T has sqrt chisqs on diagonal and normals below diagonal int m = V.n_rows; mat T = zeros(m,m); for(int i = 0; i < m; i++) { T(i,i) = sqrt(rchisq(1,nu-i)[0]); //rchisq returns a vectorized object, so using [0] allows for the conversion to double } for(int j = 0; j < m; j++) { for(int i = j+1; i < m; i++) { T(i,j) = rnorm(1)[0]; //rnorm returns a NumericVector, so using [0] allows for conversion to double }} mat C = trans(T)*chol(V); mat CI = solve(trimatu(C),eye(m,m)); //trimatu interprets the matrix as upper triangular and makes solve more efficient // C is the upper triangular root of Wishart therefore, W=C'C // this is the LU decomposition Inv(W) = CICI' Note: this is // the UL decomp not LU! // W is Wishart draw, IW is W^-1 return List::create( Named("W") = trans(C) * C, Named("IW") = CI * trans(CI), Named("C") = C, Named("CI") = CI); }
int main(int argc, char** argv) { // size of the image int n = 300; // number of unknows (=number of pixels) int m = n*n; // Assembly: // list of non-zeros coefficients std::vector<T> coefficients; // the right hand side-vector resulting from the constraints Eigen::VectorXd b(m); buildProblem(coefficients, b, n); SpMat A(m,m); A.setFromTriplets(coefficients.begin(), coefficients.end()); // Solving: // performs a Cholesky factorization of A Eigen::SimplicialCholesky<SpMat> chol(A); // use the factorization to solve for the given right hand side Eigen::VectorXd x = chol.solve(b); // Export the result to a file: saveAsBitmap(x, n, argv[1]); return 0; }
AD::SXMatrix CasadiSystem::gauss_likelihood(const AD::SXMatrix& v) { mat Sf = chol(this->R); mat Sf_inv = inv(Sf); int r_size = this->R.n_cols; float C = pow(2*M_PI, r_size/2)*prod(diagvec(Sf)); AD::SXMatrix Sf_inv_casadi(r_size,r_size); for(int i=0; i < r_size; ++i) { for(int j=0; j < r_size; ++j) { Sf_inv_casadi(i,j) = Sf_inv(i,j); } } AD::SXMatrix M = mul(Sf_inv_casadi, v); AD::SXMatrix E_exp_sum = exp(-0.5*mul(trans(M),M)); AD::SXMatrix w = E_exp_sum / C; // no need to normalize here because normalized later on anyways return w; }
int main(int argc, char *argv[]) { double a_data[] = { 4,10, 4, 2, 10,61,28,53, 4,28,38,41, 2,53,41,78 }; gsl_matrix *L = gsl_matrix_calloc(4, 4); gsl_matrix *U = gsl_matrix_calloc(4, 4); gsl_matrix_view m = gsl_matrix_view_array(a_data, 4, 4); chol(&m.matrix, L, 4); int i,j; for(i=0; i<4; i++) { for(j=0; j<4; j++) { printf("% 10f ", gsl_matrix_get(&m.matrix, i, j)); } printf("\n"); } printf("\n"); for(i=0; i<4; i++) { for(j=0; j<4; j++) { printf("% 10f ", gsl_matrix_get(L, i, j)); } printf("\n"); } return 0; }
inline bool chol ( Mat<typename T1::elem_type>& out, const Base<typename T1::elem_type,T1>& X, const char* layout = "upper", const typename arma_blas_type_only<typename T1::elem_type>::result* junk = 0 ) { arma_extra_debug_sigprint(); arma_ignore(junk); try { out = chol(X, layout); } catch(std::runtime_error&) { return false; } return true; }
int main(){ //processor specs: // http://www.cpu-world.com/CPUs/Core_i7/Intel-Core%20i7-3630QM%20Mobile%20processor.html // useful article // http://www.codeproject.com/Articles/874396/Crunching-Numbers-with-AVX-and-AVX double *A, *B, *C; int i, j, n, ret, result; char matrix_file[30]; double start_time, end_time, time; double gflops_prefix, gflops; n = 1000; gflops_prefix = (n * n * n * 1.0e-6) / 3.0; sprintf(matrix_file, "input/matrix_%dx%d.txt", n, n); A = load_matrix(matrix_file, n); B = load_matrix(matrix_file, n); C = load_matrix(matrix_file, n); fprintf(stdout, "\n====================================================\n"); fprintf(stdout, "\nMatrix size: %d\n\n", n); fprintf(stdout, "Standard algorithm\n"); start_time = dclock(); result = chol(A, n); end_time = dclock(); if (result != 0) { fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); exit(2); } else { time = end_time - start_time; fprintf(stdout, "Execution time:\t\t\t\t %le\n", time); fprintf(stdout, "MFLOPS:\t\t\t\t\t %f\n", gflops_prefix / time); } int event_type; for (event_type = 0; event_type < 4; event_type++){ A = load_matrix(matrix_file, n); measure(chol, A, n, event_type); } fprintf(stdout, "\nOptimized algorithm\n"); start_time = dclock(); result = speed_chol(B, n); end_time = dclock(); if (result != 0) { fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); exit(2); } else { time = end_time - start_time; fprintf(stdout, "Execution time:\t\t\t\t %le\n", time); fprintf(stdout, "MFLOPS:\t\t\t\t\t %f\n", gflops_prefix / time); } for (event_type = 0; event_type < 4; event_type++){ B = load_matrix(matrix_file, n); measure(speed_chol, B, n, event_type); } fprintf(stdout, "\nSIMD algorithm\n"); start_time = dclock(); result = simd_chol(C, n); end_time = dclock(); if (result != 0) { fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); exit(2); } else { time = end_time - start_time; fprintf(stdout, "Execution time:\t\t\t\t %le\n", time); fprintf(stdout, "MFLOPS:\t\t\t\t\t %f\n", gflops_prefix / time); } for (event_type = 0; event_type < 4; event_type++){ C = load_matrix(matrix_file, n); measure(simd_chol, C, n, event_type); } fprintf(stdout, "\n"); fprintf(stdout, "====================================================\n"); if (assert_matrix_equality(A, C, n)){ printf("Algorithms differ in results!\n"); free(A); free(B); free(C); exit(1); } free(A); free(B); free(C); return 0; }
// [[Rcpp::export]] List rhierLinearModel_rcpp_loop(List const& regdata, mat const& Z, mat const& Deltabar, mat const& A, double nu, mat const& V, double nu_e, vec const& ssq, vec tau, mat Delta, mat Vbeta, int R, int keep, int nprint){ // Keunwoo Kim 09/16/2014 // Purpose: run hiearchical regression model // Arguments: // Data list of regdata,Z // regdata is a list of lists each list with members y, X // e.g. regdata[[i]]=list(y=y,X=X) // X has nvar columns // Z is nreg=length(regdata) x nz // Prior list of prior hyperparameters // Deltabar,A, nu.e,ssq,nu,V // note: ssq is a nreg x 1 vector! // Mcmc // list of Mcmc parameters // R is number of draws // keep is thining parameter -- keep every keepth draw // nprint - print estimated time remaining on every nprint'th draw // Output: // list of // betadraw -- nreg x nvar x R/keep array of individual regression betas // taudraw -- R/keep x nreg array of error variances for each regression // Deltadraw -- R/keep x nz x nvar array of Delta draws // Vbetadraw -- R/keep x nvar*nvar array of Vbeta draws // Model: // nreg regression equations // y_i = X_ibeta_i + epsilon_i // epsilon_i ~ N(0,tau_i) // nvar X vars in each equation // Prior: // tau_i ~ nu.e*ssq_i/chisq(nu.e) tau_i is the variance of epsilon_i // beta_i ~ N(ZDelta[i,],V_beta) // Note: ZDelta is the matrix Z * Delta; [i,] refers to ith row of this product! // vec(Delta) | V_beta ~ N(vec(Deltabar),Vbeta (x) A^-1) // V_beta ~ IW(nu,V) or V_beta^-1 ~ W(nu,V^-1) // Delta, Deltabar are nz x nvar // A is nz x nz // Vbeta is nvar x nvar // NOTE: if you don't have any z vars, set Z=iota (nreg x 1) // Update Note: // (Keunwoo Kim 04/07/2015) // Changed "rmultireg" to return List object, which is the original function. // Efficiency is almost same as when the output is a struct object. // Nothing different from "rmultireg1" in the previous R version. int reg, mkeep; mat Abeta, betabar, ucholinv, Abetabar; List regdatai, rmregout; unireg regout_struct; int nreg = regdata.size(); int nvar = V.n_cols; int nz = Z.n_cols; // convert List to std::vector of struct std::vector<moments> regdata_vector; moments regdatai_struct; // store vector with struct for (reg=0; reg<nreg; reg++){ regdatai = regdata[reg]; regdatai_struct.y = as<vec>(regdatai["y"]); regdatai_struct.X = as<mat>(regdatai["X"]); regdatai_struct.XpX = as<mat>(regdatai["XpX"]); regdatai_struct.Xpy = as<vec>(regdatai["Xpy"]); regdata_vector.push_back(regdatai_struct); } mat betas(nreg, nvar); mat Vbetadraw(R/keep, nvar*nvar); mat Deltadraw(R/keep, nz*nvar); mat taudraw(R/keep, nreg); cube betadraw(nreg, nvar, R/keep); if (nprint>0) startMcmcTimer(); //start main iteration loop for (int rep=0; rep<R; rep++){ // compute the inverse of Vbeta ucholinv = solve(trimatu(chol(Vbeta)), eye(nvar,nvar)); //trimatu interprets the matrix as upper triangular and makes solve more efficient Abeta = ucholinv*trans(ucholinv); betabar = Z*Delta; Abetabar = Abeta*trans(betabar); //loop over all regressions for (reg=0; reg<nreg; reg++){ regout_struct = runiregG(regdata_vector[reg].y, regdata_vector[reg].X, regdata_vector[reg].XpX, regdata_vector[reg].Xpy, tau[reg], Abeta, Abetabar(span::all,reg), nu_e, ssq[reg]); betas(reg,span::all) = trans(regout_struct.beta); tau[reg] = regout_struct.sigmasq; } //draw Vbeta, Delta | {beta_i} rmregout = rmultireg(betas,Z,Deltabar,A,nu,V); Vbeta = as<mat>(rmregout["Sigma"]); //conversion from Rcpp to Armadillo requires explict declaration of variable type using as<> Delta = as<mat>(rmregout["B"]); //print time to completion and draw # every nprint'th draw if (nprint>0) if ((rep+1)%nprint==0) infoMcmcTimer(rep, R); if((rep+1)%keep==0){ mkeep = (rep+1)/keep; Vbetadraw(mkeep-1, span::all) = trans(vectorise(Vbeta)); Deltadraw(mkeep-1, span::all) = trans(vectorise(Delta)); taudraw(mkeep-1, span::all) = trans(tau); betadraw.slice(mkeep-1) = betas; } } if (nprint>0) endMcmcTimer(); return List::create( Named("Vbetadraw") = Vbetadraw, Named("Deltadraw") = Deltadraw, Named("betadraw") = betadraw, Named("taudraw") = taudraw); }
int main(int argc, char **argv) { if(argc != 3) { fprintf(stderr, "Wrong number of arguments. Provide rounding mode and event number [0-50].\n"); fprintf(stderr, "Possible rounding modes:\n\t0 - to nearest\n\t1 - upward\n\t2 - downward\n\t3 - toward zero\n"); fprintf(stderr, "Execution: program_name <rounding_mode> <event_number>\n"); exit(-1); } int choosen = atoi(argv[1]); int rounding_mode; if(choosen == 0) { rounding_mode = FE_TONEAREST; } else if(choosen == 1) { rounding_mode = FE_UPWARD; } else if(choosen == 2) { rounding_mode = FE_DOWNWARD; } else if(choosen == 3) { rounding_mode = FE_TOWARDZERO; } else { fprintf(stderr, "Incorrect rounding mode. Should be one of [0, 1, 2, 3]\n"); exit(-2); } const int EVENTS_SIZE = 50; int option = atoi(argv[2]); if (option < 0 || option >= EVENTS_SIZE) { fprintf(stderr, "Incorrect option chosen.\n"); exit(-2); } int check = fesetround(rounding_mode); if(check != 0) { fprintf(stderr, "Unable to set rounding mode.\n"); exit(-3); } double *A; int i, j, n, ret; n = 3; A = calloc(n * n, sizeof(double)); assert(A != NULL); A[IDX(0, 0, n)] = 4.0; A[IDX(0, 1, n)] = 12.0; A[IDX(0, 2, n)] = -16.0; A[IDX(1, 0, n)] = 12.0; A[IDX(1, 1, n)] = 37.0; A[IDX(1, 2, n)] = -43.0; A[IDX(2, 0, n)] = -16.0; A[IDX(2, 1, n)] = -43.0; A[IDX(2, 2, n)] = 98.0; /* init lib */ int events[] = {PAPI_L1_DCM, PAPI_L1_ICM, PAPI_L2_DCM, PAPI_L2_ICM, PAPI_L1_TCM, PAPI_L2_TCM, PAPI_L3_TCM, PAPI_TLB_DM, PAPI_TLB_IM, PAPI_L1_LDM, PAPI_L1_STM, PAPI_L2_STM, PAPI_STL_ICY, PAPI_BR_UCN, PAPI_BR_CN, PAPI_BR_TKN, PAPI_BR_NTK, PAPI_BR_MSP, PAPI_BR_PRC, PAPI_TOT_INS, PAPI_FP_INS, PAPI_LD_INS, PAPI_SR_INS, PAPI_BR_INS, PAPI_TOT_CYC, PAPI_L2_DCH, PAPI_L2_DCA, PAPI_L3_DCA, PAPI_L2_DCR, PAPI_L3_DCR, PAPI_L2_DCW, PAPI_L3_DCW, PAPI_L2_ICH, PAPI_L2_ICA, PAPI_L3_ICA, PAPI_L2_ICR, PAPI_L3_ICR, PAPI_L2_TCA, PAPI_L3_TCA, PAPI_L2_TCR, PAPI_L3_TCR, PAPI_L2_TCW, PAPI_L3_TCW, PAPI_FDV_INS, PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS, PAPI_VEC_SP, PAPI_VEC_DP, PAPI_REF_CYC}; char *event_names[] = {"PAPI_L1_DCM", "PAPI_L1_ICM", "PAPI_L2_DCM", "PAPI_L2_ICM", "PAPI_L1_TCM", "PAPI_L2_TCM", "PAPI_L3_TCM", "PAPI_TLB_DM", "PAPI_TLB_IM", "PAPI_L1_LDM", "PAPI_L1_STM", "PAPI_L2_STM", "PAPI_STL_ICY", "PAPI_BR_UCN", "PAPI_BR_CN", "PAPI_BR_TKN", "PAPI_BR_NTK", "PAPI_BR_MSP", "PAPI_BR_PRC", "PAPI_TOT_INS", "PAPI_FP_INS", "PAPI_LD_INS", "PAPI_SR_INS", "PAPI_BR_INS", "PAPI_TOT_CYC", "PAPI_L2_DCH", "PAPI_L2_DCA", "PAPI_L3_DCA", "PAPI_L2_DCR", "PAPI_L3_DCR", "PAPI_L2_DCW", "PAPI_L3_DCW", "PAPI_L2_ICH", "PAPI_L2_ICA", "PAPI_L3_ICA", "PAPI_L2_ICR", "PAPI_L3_ICR", "PAPI_L2_TCA", "PAPI_L3_TCA", "PAPI_L2_TCR", "PAPI_L3_TCR", "PAPI_L2_TCW", "PAPI_L3_TCW", "PAPI_FDV_INS", "PAPI_FP_OPS", "PAPI_SP_OPS", "PAPI_DP_OPS", "PAPI_VEC_SP", "PAPI_VEC_DP", "PAPI_REF_CYC"}; long long values[1] = {0}; int eventSet = PAPI_NULL; int papi_err; bool papi_supported = true; if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) { fprintf(stderr, "PAPI is unsupported.\n"); papi_supported = false; } // if (PAPI_num_counters() < EVENTS_SIZE) { // fprintf(stderr, "PAPI is unsupported.\n"); // papi_supported = false; // } if ((papi_err = PAPI_create_eventset(&eventSet)) != PAPI_OK) { fprintf(stderr, "Could not create event set: %s\n", PAPI_strerror(papi_err)); } if ((papi_err = PAPI_add_event(eventSet, events[option])) != PAPI_OK) { fprintf(stderr, "Could not add event %d: %s\n", i, PAPI_strerror(papi_err)); } /* start counters */ if (papi_supported) { if ((papi_err = PAPI_start(eventSet)) != PAPI_OK) { fprintf(stderr, "Could not start counters: %s\n", PAPI_strerror(papi_err)); } } check = chol(A, n); /* stop conuters */ if (papi_supported) { if ((papi_err = PAPI_stop(eventSet, values)) != PAPI_OK) { fprintf(stderr, "Could not get values: %s\n", PAPI_strerror(papi_err)); } printf("Performance counters for factorization stage: \n"); printf("%s: %lld\n", event_names[option], values[0]); } if (check != 0) { fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); } else { fprintf(stdout, "Tri(L) = \n"); for (i = 0; i < n; i++) { for (j = 0; j <= i; j++) printf("%2.8lf\t", A[IDX(i, j, n)]); printf("\n"); } } free(A); return 0; }