int main(int argc, char **argv){ if(argc!=2){ std::cerr << "Usage: " << argv[0] << " mesh_file" << std::endl; } Mesh *mesh = new Mesh(argv[1]); Quality q = mesh->get_mesh_quality(); std::cout << "Initial quality:\n" << "Quality mean: " << q.mean << std::endl << "Quality min: " << q.min << std::endl; double time = get_wtime(); smooth(mesh, 200); double time_smooth = get_wtime() - time; q = mesh->get_mesh_quality(); std::cout<<"After smoothing:\n" << "Quality mean: " << q.mean << std::endl << "Quality min: " << q.min << std::endl; if((q.mean>0.90)&&(q.min>0.55)) std::cout << "Test passed"<< std::endl; else std::cout << "Test failed"<< std::endl; std::cout<<"BENCHMARK: " << time_smooth << "s" << std::endl; delete mesh; return EXIT_SUCCESS; }
int fib0 ( int n ) { double start,end; int par_res; start = get_wtime(); par_res = fib( n,0 ); end = get_wtime(); std::cout << "Fibonacci result for " << n << " is " << par_res << std::endl; std::cout << "Computation time: " << end - start << " seconds." << std::endl; return par_res; }
void GPUNB_send( int nj, double mj[], double xj[][3], double vj[][3]){ time_send -= get_wtime(); nbody = nj; // std::cout << "gpu send: " << nbody << " " << nbodymax << std::endl; assert(nbody <= nbodymax); #pragma omp parallel for for(int j=0; j<nj; j++){ jp_host[j] = Jparticle(mj[j], xj[j], vj[j]); } time_send += get_wtime(); }
int128_t S2_hard_mpi(int128_t x, int64_t y, int64_t z, int64_t c, int128_t s2_hard_approx, int threads) { print(""); print("=== S2_hard_mpi(x, y) ==="); print("Computation of the hard special leaves"); print(x, y, c, threads); int128_t s2_hard = 0; double time = get_wtime(); if (is_mpi_master_proc()) s2_hard = S2_hard_mpi_master(x, y, z, c, s2_hard_approx, threads); else { // uses less memory if (y <= FactorTable<uint16_t>::max()) S2_hard_mpi_slave<uint16_t>((intfast128_t) x, y, z, c, (intfast128_t) s2_hard_approx, threads); else S2_hard_mpi_slave<uint32_t>((intfast128_t) x, y, z, c, (intfast128_t) s2_hard_approx, threads); } print("S2_hard", s2_hard, time); return s2_hard; }
int128_t S2_easy(int128_t x, int64_t y, int64_t z, int64_t c, int threads) { print(""); print("=== S2_easy(x, y) ==="); print("Computation of the easy special leaves"); print(x, y, c, threads); double time = get_wtime(); int128_t s2_easy; // uses less memory if (y <= std::numeric_limits<uint32_t>::max()) { vector<uint32_t> primes = generate_primes<uint32_t>(y); s2_easy = S2_easy::S2_easy((intfast128_t) x, y, z, c, primes, threads); } else { vector<int64_t> primes = generate_primes<int64_t>(y); s2_easy = S2_easy::S2_easy((intfast128_t) x, y, z, c, primes, threads); } print("S2_easy", s2_easy, time); return s2_easy; }
int128_t S2_easy(int128_t x, int64_t y, int64_t z, int64_t c, int threads) { #ifdef HAVE_MPI if (mpi_num_procs() > 1) return S2_easy_mpi(x, y, z, c, threads); #endif print(""); print("=== S2_easy(x, y) ==="); print("Computation of the easy special leaves"); print(x, y, c, threads); double time = get_wtime(); int128_t s2_easy; // uses less memory if (y <= numeric_limits<uint32_t>::max()) { vector<uint32_t> primes = generate_primes<uint32_t>(y); s2_easy = S2_easy_OpenMP((intfast128_t) x, y, z, c, primes, threads); } else { vector<int64_t> primes = generate_primes<int64_t>(y); s2_easy = S2_easy_OpenMP((intfast128_t) x, y, z, c, primes, threads); } print("S2_easy", s2_easy, time); return s2_easy; }
int64_t S2_hard(int64_t x, int64_t y, int64_t z, int64_t c, int64_t s2_hard_approx, int threads) { #ifdef HAVE_MPI if (mpi_num_procs() > 1) return S2_hard_mpi(x, y, z, c, s2_hard_approx, threads); #endif print(""); print("=== S2_hard(x, y) ==="); print("Computation of the hard special leaves"); print(x, y, c, threads); double time = get_wtime(); FactorTable<uint16_t> factors(y, threads); int64_t max_prime = z / isqrt(y); vector<int32_t> primes = generate_primes(max_prime); int64_t s2_hard = S2_hard_OpenMP_master((intfast64_t) x, y, z, c, (intfast64_t) s2_hard_approx, primes, factors, threads); print("S2_hard", s2_hard, time); return s2_hard; }
/// P3(x, a) counts the numbers <= x that have exactly 3 /// prime factors each exceeding the a-th prime. /// Space complexity: O(pi(sqrt(x))). /// int64_t P3(int64_t x, int64_t a, int threads) { print(""); print("=== P3(x, a) ==="); print("Computation of the 3rd partial sieve function"); double time = get_wtime(); vector<int32_t> primes = generate_primes(isqrt(x)); int64_t y = iroot<3>(x); int64_t pi_y = pi_bsearch(primes, y); int64_t sum = 0; threads = ideal_num_threads(threads, pi_y, 100); #pragma omp parallel for num_threads(threads) schedule(dynamic) reduction(+: sum) for (int64_t i = a + 1; i <= pi_y; i++) { int64_t xi = x / primes[i]; int64_t bi = pi_bsearch(primes, isqrt(xi)); for (int64_t j = i; j <= bi; j++) sum += pi_bsearch(primes, xi / primes[j]) - (j - 1); } print("P3", sum, time); return sum; }
void print(const string& res_str, maxint_t res, double time) { if (print_status()) { cout << "\r" << string(50,' ') << "\r"; cout << "Status: 100%" << endl; cout << res_str << " = " << res << endl; print_seconds(get_wtime() - time); } }
int128_t P2_mpi(int128_t x, int64_t y, int threads) { print(""); print("=== P2_mpi(x, y) ==="); print("Computation of the 2nd partial sieve function"); print(x, y, threads); double time = get_wtime(); int128_t p2 = P2_mpi_master(x, y, threads); print("P2", p2, time); return p2; }
static void irr_simd_firr_vec( const double ti, const int ni, const int addr[], _out_ double accout[][3], _out_ double jrkout[][3], _out_ int nnbid[]) { // printf("NI %d TIME %f FIRST %d",ni,ti,addr[0]); const double t0 = get_wtime(); ::vec_tnow = (v2df){ti, ti}; int ninter = 0; #pragma omp parallel for reduction(+: ninter) schedule(guided) for(int i=0; i<ni; i++){ irr_simd_firr(addr[i]-1, accout[i], jrkout[i], nnbid[i]); ninter += list[addr[i]-1].nnb; } ::num_inter += ninter; const double t1 = get_wtime(); ::time_grav += t1-t0; ::num_fcall++; ::num_steps += ni; }
/// Partial sieve function (a.k.a. Legendre-sum). /// phi(x, a) counts the numbers <= x that are not divisible /// by any of the first a primes. /// int64_t phi(int64_t x, int64_t a, int threads) { if (x < 1) return 0; if (a > x) return 1; if (a < 1) return x; print(""); print("=== phi(x, a) ==="); print("Count the numbers <= x coprime to the first a primes"); double time = get_wtime(); int64_t sum = 0; if (is_phi_tiny(a)) sum = phi_tiny(x, a); else { vector<int32_t> primes = generate_n_primes(a); if (primes.at(a) >= x) sum = 1; else { // use a large pi(x) lookup table for speed int64_t sqrtx = isqrt(x); PiTable pi(max(sqrtx, primes[a])); PhiCache cache(primes, pi); int64_t pi_sqrtx = min(pi[sqrtx], a); sum = x - a + pi_sqrtx; int64_t p14 = ipow((int64_t) 10, 14); int64_t thread_threshold = p14 / primes[a]; threads = ideal_num_threads(threads, x, thread_threshold); // this loop scales only up to about 8 CPU cores threads = min(8, threads); #pragma omp parallel for schedule(dynamic, 16) \ num_threads(threads) firstprivate(cache) reduction(+: sum) for (int64_t a2 = 0; a2 < pi_sqrtx; a2++) sum += cache.phi<-1>(x / primes[a2 + 1], a2); } } print("phi", sum, time); return sum; }
void S2Status::print(maxint_t n, maxint_t limit, double rsd) { double t2 = get_wtime(); if (old_ >= 0 && (t2 - time_) < 0.01) return; time_ = t2; int percent = skewed_percent(n, limit); int load_balance = (int) in_between(0, 100 - rsd + 0.5, 100); old_ = percent; ostringstream oss; oss << "\r" << string(40,' '); oss << "\rStatus: " << percent << "%, "; oss << "Load balance: " << load_balance << "%"; cout << oss.str() << flush; }
int128_t S2_trivial(int128_t x, int64_t y, int64_t z, int64_t c, int threads) { print(""); print("=== S2_trivial(x, y) ==="); print("Computation of the trivial special leaves"); print(x, y, c, threads); double time = get_wtime(); int128_t s2_trivial = S2_trivial_OpenMP(x, y, z, c, threads); print("S2_trivial", s2_trivial, time); return s2_trivial; }
int64_t S2_easy(int64_t x, int64_t y, int64_t z, int64_t c, int threads) { print(""); print("=== S2_easy(x, y) ==="); print("Computation of the easy special leaves"); print(x, y, c, threads); double time = get_wtime(); vector<int32_t> primes = generate_primes(y); int64_t s2_easy = S2_easy::S2_easy((intfast64_t) x, y, z, c, primes, threads); print("S2_easy", s2_easy, time); return s2_easy; }
void test(const int nrep) { REAL mat[N][N] __attribute__((aligned(64))); REAL in [N][N] __attribute__((aligned(64))); REAL out[N][N] __attribute__((aligned(64))); REAL fdum; for (int j = 0; j < N; j++) for (int i = j; i < N; i++) mat[i][j] = mat[j][i] = drand48(); for (int j = 0; j < N; j++) for (int i = 0; i < N; i++) { REAL res = 0.0; for (int k = 0; k < N; k++) res += mat[j][k] * mat[k][i]; in[j][i] = res; } for (int j = 0; j < N; j++) for (int i = j; i < N; i++) assert(in[i][j] == in[j][i]); double t0 = get_wtime(); #pragma omp parallel for firstprivate(in) private(out) for (int r = 0; r < nrep; r++) assert(inverse_cholesky(in, out) > 0); const double dt_custom = get_wtime() - t0; fprintf(stderr, " custom inverse= %g sec \n", dt_custom); t0 = get_wtime(); #pragma omp parallel for firstprivate(in) private(out, fdum) for (int r = 0; r < nrep; r++) assert(my_inverse(in, out, fdum) == false); const double dt_lapack = get_wtime() - t0; fprintf(stderr, " LAPACK inverse= %g sec (ratio= %g)\n", dt_lapack, dt_lapack/dt_custom); #if 0 /* does not work from MKL */ t0 = get_wtime(); for (int r = 0; r < nrep; r++) assert(my_inverse_gold(in, out, fdum) == false); fprintf(stderr, " DSYEVD inverse= %g sec \n", get_wtime() - t0); #endif };
int64_t S2_easy(int64_t x, int64_t y, int64_t z, int64_t c, int threads) { #ifdef HAVE_MPI if (mpi_num_procs() > 1) return S2_easy_mpi(x, y, z, c, threads); #endif print(""); print("=== S2_easy(x, y) ==="); print("Computation of the easy special leaves"); print(x, y, c, threads); double time = get_wtime(); vector<int32_t> primes = generate_primes(y); int64_t s2_easy = S2_easy_OpenMP((intfast64_t) x, y, z, c, primes, threads); print("S2_easy", s2_easy, time); return s2_easy; }
int main(int argc, char **argv) { #ifdef HAVE_MPI int required_thread_support=MPI_THREAD_SINGLE; int provided_thread_support; MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support); assert(required_thread_support==provided_thread_support); #endif bool verbose = false; if(argc>1) { verbose = std::string(argv[1])=="-v"; } #ifdef HAVE_VTK Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box10x10x10.vtu"); mesh->create_boundary(); MetricField<double,3> metric_field(*mesh); size_t NNodes = mesh->get_number_nodes(); std::vector<double> psi(NNodes); for(size_t i=0; i<NNodes; i++) psi[i] = pow(mesh->get_coords(i)[0], 4) + pow(mesh->get_coords(i)[1], 4) + pow(mesh->get_coords(i)[2], 4); metric_field.add_field(&(psi[0]), 0.001); metric_field.update_mesh(); Refine<double,3> adapt(*mesh); double tic = get_wtime(); for(int i=0; i<2; i++) adapt.refine(sqrt(2.0)); double toc = get_wtime(); if(verbose) mesh->verify(); mesh->defragment(); VTKTools<double>::export_vtu("../data/test_refine_3d", mesh); double qmean = mesh->get_qmean(); double qmin = mesh->get_qmin(); int nelements = mesh->get_number_elements(); if(verbose) std::cout<<"Refine loop time: "<<toc-tic<<std::endl <<"Number elements: "<<nelements<<std::endl <<"Quality mean: "<<qmean<<std::endl <<"Quality min: "<<qmin<<std::endl; long double area = mesh->calculate_area(); long double volume = mesh->calculate_volume(); long double ideal_area(6), ideal_volume(1); std::cout<<"Checking area == 6: "; if(std::abs(area-ideal_area)/std::max(area, ideal_area)<DBL_EPSILON) std::cout<<"pass"<<std::endl; else std::cout<<"fail (area="<<area<<")"<<std::endl; std::cout<<"Checking volume == 1: "; if(std::abs(volume-ideal_volume)/std::max(volume, ideal_volume)<DBL_EPSILON) std::cout<<"pass"<<std::endl; else std::cout<<"fail (volume="<<volume<<")"<<std::endl; delete mesh; #else std::cerr<<"Pragmatic was configured without VTK"<<std::endl; #endif #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
void GPUNB_regf( int ni, double h2d[], double dtr[], double xid[][3], double vid[][3], double acc[][3], double jrk[][3], double pot[], int lmax, int nbmax, int *listbase, int m_flag){ // std::cout << " Call GPUNB_regf " << ni << std::endl; time_grav -= get_wtime(); numInter += ni * nbody; ::icall++; ::ini +=ni; #pragma omp parallel for for(int i=0; i<ni; i+=4){ int tid = omp_get_thread_num(); nblist[tid][0].clear(); nblist[tid][1].clear(); nblist[tid][2].clear(); nblist[tid][3].clear(); int nii = std::min(4, ni-i); v4sf xi = {xid[i+0][0], xid[i+1][0], xid[i+2][0], xid[i+3][0]}; v4sf yi = {xid[i+0][1], xid[i+1][1], xid[i+2][1], xid[i+3][1]}; v4sf zi = {xid[i+0][2], xid[i+1][2], xid[i+2][2], xid[i+3][2]}; v4sf vxi = {vid[i+0][0], vid[i+1][0], vid[i+2][0], vid[i+3][0]}; v4sf vyi = {vid[i+0][1], vid[i+1][1], vid[i+2][1], vid[i+3][1]}; v4sf vzi = {vid[i+0][2], vid[i+1][2], vid[i+2][2], vid[i+3][2]}; v4sf h2i = {h2d[i+0], h2d[i+1], h2d[i+2], h2d[i+3]}; static const v4sf h2mask[5] = { {0.0, 0.0, 0.0, 0.0}, {1.0, 0.0, 0.0, 0.0}, {1.0, 1.0, 0.0, 0.0}, {1.0, 1.0, 1.0, 0.0}, {1.0, 1.0, 1.0, 1.0}, }; h2i *= h2mask[nii]; v4sf dtri = {dtr[i+0], dtr[i+1], dtr[i+2], dtr[i+3]}; v4sf Ax = {0.f, 0.f, 0.f, 0.f}; v4sf Ay = {0.f, 0.f, 0.f, 0.f}; v4sf Az = {0.f, 0.f, 0.f, 0.f}; v4sf Jx = {0.f, 0.f, 0.f, 0.f}; v4sf Jy = {0.f, 0.f, 0.f, 0.f}; v4sf Jz = {0.f, 0.f, 0.f, 0.f}; v4sf poti = {0.f, 0.f, 0.f, 0.f}; v4sf *jpp = (v4sf *)jp_host; for(int j=0; j<nbody; j++, jpp+=2){ v4sf jp0 = jpp[0]; v4sf jp1 = jpp[1]; v4sf xj = __builtin_ia32_shufps(jp0, jp0, 0x00); v4sf yj = __builtin_ia32_shufps(jp0, jp0, 0x55); v4sf zj = __builtin_ia32_shufps(jp0, jp0, 0xaa); v4sf mj = __builtin_ia32_shufps(jp0, jp0, 0xff); v4sf vxj = __builtin_ia32_shufps(jp1, jp1, 0x00); v4sf vyj = __builtin_ia32_shufps(jp1, jp1, 0x55); v4sf vzj = __builtin_ia32_shufps(jp1, jp1, 0xaa); v4sf dx = xj - xi; v4sf dy = yj - yi; v4sf dz = zj - zi; v4sf dvx = vxj - vxi; v4sf dvy = vyj - vyi; v4sf dvz = vzj - vzi; v4sf dxp = dx + dtri * dvx; v4sf dyp = dy + dtri * dvy; v4sf dzp = dz + dtri * dvz; v4sf r2 = dx*dx + dy*dy + dz*dz; v4sf rv = dx*dvx + dy*dvy + dz*dvz; v4sf r2p = dxp*dxp + dyp*dyp + dzp*dzp; v4sf mask; // v4sf mask = (v4sf)__builtin_ia32_cmpltps(r2, h2i); if(m_flag) { v4sf mh2i = mj * h2i; mask = (v4sf)__builtin_ia32_cmpltps( __builtin_ia32_minps(r2,r2p), mh2i); } else { mask = (v4sf)__builtin_ia32_cmpltps( __builtin_ia32_minps(r2,r2p), h2i); } int bits = __builtin_ia32_movmskps(mask); // mj = __builtin_ia32_andnps(mask, mj); if(bits){ if (bits&1) nblist[tid][0].push_back(j); if (bits&2) nblist[tid][1].push_back(j); if (bits&4) nblist[tid][2].push_back(j); if (bits&8) nblist[tid][3].push_back(j); } v4sf rinv1 = v4sf_rsqrt(r2); rinv1 = __builtin_ia32_andnps(mask, rinv1); // v4sf rinv1 = __builtin_ia32_rsqrtps(r2); v4sf rinv2 = rinv1 * rinv1; rinv1 *= mj; poti += rinv1; v4sf rinv3 = rinv1 * rinv2; rv *= (v4sf){-3.f, -3.f, -3.f, -3.f} * rinv2; Ax += rinv3 * dx; Ay += rinv3 * dy; Az += rinv3 * dz; Jx += rinv3 * (dvx + rv * dx); Jy += rinv3 * (dvy + rv * dy); Jz += rinv3 * (dvz + rv * dz); } // for(j) union { struct{ v4sf Ax, Ay, Az, Jx, Jy, Jz, Pot; }; struct{ float acc[3][4], jrk[3][4], pot[4]; }; } u; u.Ax = Ax; u.Ay = Ay; u.Az = Az; u.Jx = Jx; u.Jy = Jy; u.Jz = Jz; u.Pot = poti; for(int ii=0; ii<nii; ii++){ for(int k=0; k<3; k++){ acc[i+ii][k] = u.acc[k][ii]; jrk[i+ii][k] = u.jrk[k][ii]; } pot[i+ii] = u.pot[ii]; int nnb = nblist[tid][ii].size(); int *nnbp = listbase + lmax * (i+ii); int *nblistp = nnbp + 1; if(nnb > nbmax){ *nnbp = -nnb; }else{ *nnbp = nnb; for(int k=0; k<nnb; k++){ nblistp[k] = nblist[tid][ii][k]; } } } } // printf("gpu: %e %e %e %d\n", xid[0][0], acc[0][0], jrk[0][0], *listbase); #if 0 if(ni > 0){ FILE *fp = fopen("Force.sse", "w"); assert(fp); for(int i=0; i<ni; i++){ int nnb = listbase[i*lmax]; fprintf(fp, "%d %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %d\n", i, acc[i][0], acc[i][1], acc[i][2], jrk[i][0], jrk[i][1], jrk[i][2], nnb); } fprintf(fp, "\n"); fclose(fp); exit(1); } #endif time_grav += get_wtime(); }
int main(int argc, char * argv []) { int n = 1000; srand48(120); // srand48(123); if (argc > 1) srand48(atoi(argv[1])); if (argc > 2) n = atoi(argv[2]); fprintf(stderr, "n= %d\n", n); MSW lp(1.0); #if 1 for (int i = 0; i < n; i++) { const real lx = 0.75; const real ly = 0.75; const real lz = 0.75; const real nx = (1.0-2.0*drand48())*lx; const real ny = (1.0-2.0*drand48())*ly; const real nz = (1.0-2.0*drand48())*lz; const real x = -nx; const real y = -ny; const real z = -nz; lp.push(HalfSpace(vec3(nx,ny,nz), vec3(x, y, z))); } #else lp.push(HalfSpace(vec3(-1.0, -1.0, 0.0), vec3(0.3, 0.3, 0.00))); lp.push(HalfSpace(vec3(+1.0, 0.0, 0.0), vec3(0.25, 0.5, 0.5))); lp.push(HalfSpace(vec3(-1.0, 0.0, 0.0), vec3(0.75, 0.5, 0.5))); lp.push(HalfSpace(vec3(0.0, +1.0, 0.0), vec3(0.5, 0.25, 0.5))); lp.push(HalfSpace(vec3(0.0, -1.0, 0.0), vec3(0.5, 0.75, 0.5))); lp.push(HalfSpace(vec3(0.0, 0.0, +1.0), vec3(0.5, 0.5, 0.25))); lp.push(HalfSpace(vec3(0.0, 0.0, -1.0), vec3(0.5, 0.5, 0.75))); // lp.push(HalfSpace(vec3(0.0, 0.0, +1.0), vec3(0.5, 0.5, 0.40))); // lp.push(HalfSpace(vec3(0.0, 0.0, -1.0), vec3(0.5, 0.5, 0.30))); // lp.push(HalfSpace(vec3(0.0, +1.0, 0.0), vec3(0.5, 0.3, 0.30))); #endif fprintf(stderr, " nspace= %d\n", lp.nspace()); const int nrep = 1000; vec3 cvec(-1.0, +1.0, 0.0); #if 0 vec3 pos = lp.solve(cvec, false); fprintf(stderr, " pos= %g %g %g \n", pos.x, pos.y, pos.z); #else cvec = vec3(1.0 - 2.0*drand48(), 1.0 - 2.0*drand48(), 1.0 - 2.0*drand48()); vec3 pos = lp.solve(cvec, false); { const double t0 = get_wtime(); fprintf(stderr, " pos= %g %g %g \n", pos.x, pos.y, pos.z); for (int i = 0; i < nrep; i++) { vec3 pos1 = lp.solve(cvec, false); if ((pos1 - pos).norm2() > 1.0e-20*pos.norm2()) fprintf(stderr, " pos= %g %g %g \n", pos1.x, pos1.y, pos1.z); } const double dt = get_wtime() - t0; fprintf(stderr, " norm done in %g sec\n", dt/nrep); } { const double t0 = get_wtime(); for (int i = 0; i < nrep; i++) { vec3 pos1 = lp.solve(cvec, true); if ((pos1 - pos).norm2() > 1.0e-20*pos.norm2()) fprintf(stderr, " rpos= %g %g %g \n", pos1.x, pos1.y, pos1.z); } const double dt = get_wtime() - t0; fprintf(stderr, " rand done in %g sec\n", dt/nrep); } { nflops = 0; int nrep = 100000; std::vector<vec3> vecList(nrep); const double t0 = get_wtime(); for (int i = 0; i < nrep; i++) { const vec3 cvec(1.0 - 2.0*drand48(), 1.0 - 2.0*drand48(), 1.0 - 2.0*drand48()); vecList[i] = lp.solve(cvec, false); } const double dt = get_wtime() - t0; fprintf(stderr, " test done in %g sec [%g sec per element]\n", dt, dt/nrep); fprintf(stderr, " performance: %g GFLOP %g GFLOP/s \n", nflops*1.0/1e9, nflops*1.0/dt/1e9); } #endif return 0; }
int main(int argc, char **argv) { int rank=0; int required_thread_support=MPI_THREAD_SINGLE; int provided_thread_support; MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support); assert(required_thread_support==provided_thread_support); MPI_Comm_rank(MPI_COMM_WORLD, &rank); bool verbose = false; if(argc>1) { verbose = std::string(argv[1])=="-v"; } #ifdef HAVE_VTK Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box200x200.vtu"); mesh->create_boundary(); MetricField<double,2> metric_field(*mesh); size_t NNodes = mesh->get_number_nodes(); for(size_t i=0; i<NNodes; i++) { double m[] = {0.5, 0.0, 0.5}; metric_field.set_metric(m, i); } metric_field.update_mesh(); Coarsen<double,2> adapt(*mesh); double L_up = sqrt(2.0); double L_low = L_up*0.5; double tic = get_wtime(); adapt.coarsen(L_low, L_up); double toc = get_wtime(); mesh->defragment(); int nelements = mesh->get_number_elements(); long double perimeter = mesh->calculate_perimeter(); long double area = mesh->calculate_area(); if(verbose) { if(rank==0) std::cout<<"Coarsen loop time: "<<toc-tic<<std::endl <<"Number elements: "<<nelements<<std::endl <<"Perimeter: "<<perimeter<<std::endl; } VTKTools<double>::export_vtu("../data/test_coarsen_2d", mesh); delete mesh; if(rank==0) { std::cout<<"Expecting 2 elements: "; if(nelements==2) std::cout<<"pass"<<std::endl; else std::cout<<"fail"<<std::endl; long double perimeter_exact(4); std::cout<<"Expecting perimeter = "<<perimeter_exact<<": "<<perimeter<<" ("<<std::abs(perimeter-perimeter_exact)/std::max(perimeter, perimeter_exact)<<") "; if(std::abs(perimeter-perimeter_exact)/std::max(perimeter, perimeter_exact)<DBL_EPSILON) std::cout<<"pass"<<std::endl; else std::cout<<"fail"<<std::endl; long double area_exact = 1; std::cout<<"Expecting area = "<<area_exact<<": "; if(std::abs(area-area_exact)/std::max(area, area_exact)<DBL_EPSILON) std::cout<<"pass"<<std::endl; else std::cout<<"fail"<<std::endl; } #else std::cerr<<"Pragmatic was configured without VTK"<<std::endl; #endif MPI_Finalize(); return 0; }
int main(int argc, char **argv) { #ifdef HAVE_MPI int required_thread_support=MPI_THREAD_SINGLE; int provided_thread_support; MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support); assert(required_thread_support==provided_thread_support); #endif #ifdef HAVE_VTK Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box200x200.vtu"); mesh->create_boundary(); size_t NNodes = mesh->get_number_nodes(); // Set up field - use first touch policy std::vector<double> psi(NNodes); #pragma omp parallel { #pragma omp for schedule(static) for(size_t i=0; i<NNodes; i++) psi[i] = pow(mesh->get_coords(i)[0]+0.1, 2) + pow(mesh->get_coords(i)[1]+0.1, 2); } MetricField<double,2> metric_field(*mesh); double tic = get_wtime(); metric_field.add_field(&(psi[0]), 1.0); double toc = get_wtime(); metric_field.update_mesh(); std::vector<double> metric(NNodes*3); metric_field.get_metric(&(metric[0])); double rms[] = {0., 0., 0.}; for(size_t i=0; i<NNodes; i++) { rms[0] += pow(2.0-metric[i*3 ], 2); rms[1] += pow( metric[i*3+1], 2); rms[2] += pow(2.0-metric[i*3+2], 2); } double max_rms = 0; for(size_t i=0; i<3; i++) { rms[i] = sqrt(rms[i]/NNodes); max_rms = std::max(max_rms, rms[i]); } std::string vtu_filename("../data/test_hessian_2d"); VTKTools<double>::export_vtu(vtu_filename.c_str(), mesh, &(psi[0])); std::cout<<"Hessian :: loop time = "<<toc-tic<<std::endl <<"RMS = "<<rms[0]<<", "<<rms[1]<<", "<<rms[2]<<std::endl; if(max_rms>0.01) std::cout<<"fail\n"; else std::cout<<"pass\n"; delete mesh; #else std::cerr<<"Pragmatic was configured without VTK"<<std::endl; #endif #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }