示例#1
0
int main(int argc, char **argv){
  if(argc!=2){
    std::cerr << "Usage: " << argv[0] << " mesh_file" << std::endl;
  }

  Mesh *mesh = new Mesh(argv[1]);

  Quality q = mesh->get_mesh_quality();

  std::cout << "Initial quality:\n"
            << "Quality mean:  " << q.mean << std::endl
            << "Quality min:   " << q.min << std::endl;

  double time = get_wtime();
  smooth(mesh, 200);
  double time_smooth = get_wtime() - time;

  q = mesh->get_mesh_quality();

  std::cout<<"After smoothing:\n"
           << "Quality mean:  " << q.mean << std::endl
           << "Quality min:   " << q.min << std::endl;

  if((q.mean>0.90)&&(q.min>0.55))
    std::cout << "Test passed"<< std::endl;
  else
    std::cout << "Test failed"<< std::endl;

  std::cout<<"BENCHMARK: " << time_smooth << "s" << std::endl;

  delete mesh;

  return EXIT_SUCCESS;
}
int fib0 ( int n )
{
   double start,end;
   int par_res;

   start = get_wtime();
   par_res = fib( n,0 );
   end = get_wtime();

   std::cout << "Fibonacci result for " << n << " is " << par_res << std::endl;
   std::cout << "Computation time:  " << end - start << " seconds." << std::endl;
   return par_res;
}
示例#3
0
void GPUNB_send(
		int nj,
		double mj[],
		double xj[][3],
		double vj[][3]){
	time_send -= get_wtime();
	nbody = nj;
	// std::cout << "gpu send: " << nbody << " " << nbodymax << std::endl;
	assert(nbody <= nbodymax);
#pragma omp parallel for
	for(int j=0; j<nj; j++){
		jp_host[j] = Jparticle(mj[j], xj[j], vj[j]);
	}
	time_send += get_wtime();
}
示例#4
0
int128_t S2_hard_mpi(int128_t x,
                     int64_t y,
                     int64_t z,
                     int64_t c,
                     int128_t s2_hard_approx,
                     int threads)
{
  print("");
  print("=== S2_hard_mpi(x, y) ===");
  print("Computation of the hard special leaves");
  print(x, y, c, threads);

  int128_t s2_hard = 0;
  double time = get_wtime();

  if (is_mpi_master_proc())
    s2_hard = S2_hard_mpi_master(x, y, z, c, s2_hard_approx, threads);
  else
  {
    // uses less memory
    if (y <= FactorTable<uint16_t>::max())
      S2_hard_mpi_slave<uint16_t>((intfast128_t) x, y, z, c, (intfast128_t) s2_hard_approx, threads);
    else
      S2_hard_mpi_slave<uint32_t>((intfast128_t) x, y, z, c, (intfast128_t) s2_hard_approx, threads);
  }

  print("S2_hard", s2_hard, time);
  return s2_hard;
}
示例#5
0
int128_t S2_easy(int128_t x,
                 int64_t y,
                 int64_t z,
                 int64_t c,
                 int threads)
{
  print("");
  print("=== S2_easy(x, y) ===");
  print("Computation of the easy special leaves");
  print(x, y, c, threads);

  double time = get_wtime();
  int128_t s2_easy;

  // uses less memory
  if (y <= std::numeric_limits<uint32_t>::max())
  {
    vector<uint32_t> primes = generate_primes<uint32_t>(y);
    s2_easy = S2_easy::S2_easy((intfast128_t) x, y, z, c, primes, threads);
  }
  else
  {
    vector<int64_t> primes = generate_primes<int64_t>(y);
    s2_easy = S2_easy::S2_easy((intfast128_t) x, y, z, c, primes, threads);
  }

  print("S2_easy", s2_easy, time);
  return s2_easy;
}
示例#6
0
int128_t S2_easy(int128_t x,
                 int64_t y,
                 int64_t z,
                 int64_t c,
                 int threads)
{
#ifdef HAVE_MPI
  if (mpi_num_procs() > 1)
    return S2_easy_mpi(x, y, z, c, threads);
#endif

  print("");
  print("=== S2_easy(x, y) ===");
  print("Computation of the easy special leaves");
  print(x, y, c, threads);

  double time = get_wtime();
  int128_t s2_easy;

  // uses less memory
  if (y <= numeric_limits<uint32_t>::max())
  {
    vector<uint32_t> primes = generate_primes<uint32_t>(y);
    s2_easy = S2_easy_OpenMP((intfast128_t) x, y, z, c, primes, threads);
  }
  else
  {
    vector<int64_t> primes = generate_primes<int64_t>(y);
    s2_easy = S2_easy_OpenMP((intfast128_t) x, y, z, c, primes, threads);
  }

  print("S2_easy", s2_easy, time);
  return s2_easy;
}
示例#7
0
int64_t S2_hard(int64_t x,
                int64_t y,
                int64_t z,
                int64_t c,
                int64_t s2_hard_approx,
                int threads)
{
#ifdef HAVE_MPI
  if (mpi_num_procs() > 1)
    return S2_hard_mpi(x, y, z, c, s2_hard_approx, threads);
#endif

  print("");
  print("=== S2_hard(x, y) ===");
  print("Computation of the hard special leaves");
  print(x, y, c, threads);

  double time = get_wtime();
  FactorTable<uint16_t> factors(y, threads);
  int64_t max_prime = z / isqrt(y);
  vector<int32_t> primes = generate_primes(max_prime);

  int64_t s2_hard = S2_hard_OpenMP_master((intfast64_t) x, y, z, c, (intfast64_t) s2_hard_approx, primes, factors, threads);

  print("S2_hard", s2_hard, time);
  return s2_hard;
}
示例#8
0
/// P3(x, a) counts the numbers <= x that have exactly 3
/// prime factors each exceeding the a-th prime.
/// Space complexity: O(pi(sqrt(x))).
///
int64_t P3(int64_t x, int64_t a, int threads)
{
  print("");
  print("=== P3(x, a) ===");
  print("Computation of the 3rd partial sieve function");

  double time = get_wtime();
  vector<int32_t> primes = generate_primes(isqrt(x));

  int64_t y = iroot<3>(x);
  int64_t pi_y = pi_bsearch(primes, y);
  int64_t sum = 0;

  threads = ideal_num_threads(threads, pi_y, 100);

  #pragma omp parallel for num_threads(threads) schedule(dynamic) reduction(+: sum)
  for (int64_t i = a + 1; i <= pi_y; i++)
  {
    int64_t xi = x / primes[i];
    int64_t bi = pi_bsearch(primes, isqrt(xi));

    for (int64_t j = i; j <= bi; j++)
      sum += pi_bsearch(primes, xi / primes[j]) - (j - 1);
  }

  print("P3", sum, time);
  return sum;
}
示例#9
0
void print(const string& res_str, maxint_t res, double time)
{
  if (print_status())
  {
    cout << "\r" << string(50,' ') << "\r";
    cout << "Status: 100%" << endl;
    cout << res_str << " = " << res << endl;
    print_seconds(get_wtime() - time);
  }
}
示例#10
0
int128_t P2_mpi(int128_t x, int64_t y, int threads)
{
  print("");
  print("=== P2_mpi(x, y) ===");
  print("Computation of the 2nd partial sieve function");
  print(x, y, threads);

  double time = get_wtime();
  int128_t p2 = P2_mpi_master(x, y, threads);

  print("P2", p2, time);
  return p2;
}
示例#11
0
static void irr_simd_firr_vec(
        const double ti,
		const int ni,
		const int addr[],
		_out_ double accout[][3],
		_out_ double jrkout[][3],
        _out_ int nnbid[])
{
  //  printf("NI %d TIME %f FIRST %d",ni,ti,addr[0]);
	const double t0 = get_wtime();
    ::vec_tnow = (v2df){ti, ti};
	int ninter = 0;

#pragma omp parallel for reduction(+: ninter) schedule(guided)
	for(int i=0; i<ni; i++){
      irr_simd_firr(addr[i]-1, accout[i], jrkout[i], nnbid[i]);
		ninter += list[addr[i]-1].nnb;
	}
	::num_inter += ninter;
	const double t1 = get_wtime();
	::time_grav += t1-t0;
	::num_fcall++;
	::num_steps += ni;
}
示例#12
0
/// Partial sieve function (a.k.a. Legendre-sum).
/// phi(x, a) counts the numbers <= x that are not divisible
/// by any of the first a primes.
///
int64_t phi(int64_t x, int64_t a, int threads)
{
  if (x < 1) return 0;
  if (a > x) return 1;
  if (a < 1) return x;

  print("");
  print("=== phi(x, a) ===");
  print("Count the numbers <= x coprime to the first a primes");

  double time = get_wtime();
  int64_t sum = 0;

  if (is_phi_tiny(a))
    sum = phi_tiny(x, a);
  else
  {
    vector<int32_t> primes = generate_n_primes(a);

    if (primes.at(a) >= x)
      sum = 1;
    else
    {
      // use a large pi(x) lookup table for speed
      int64_t sqrtx = isqrt(x);
      PiTable pi(max(sqrtx, primes[a]));
      PhiCache cache(primes, pi);

      int64_t pi_sqrtx = min(pi[sqrtx], a); 
      sum = x - a + pi_sqrtx;

      int64_t p14 = ipow((int64_t) 10, 14);
      int64_t thread_threshold = p14 / primes[a];
      threads = ideal_num_threads(threads, x, thread_threshold);

      // this loop scales only up to about 8 CPU cores
      threads = min(8, threads);

      #pragma omp parallel for schedule(dynamic, 16) \
          num_threads(threads) firstprivate(cache) reduction(+: sum)
      for (int64_t a2 = 0; a2 < pi_sqrtx; a2++)
        sum += cache.phi<-1>(x / primes[a2 + 1], a2);
    }
  }

  print("phi", sum, time);
  return sum;
}
示例#13
0
void S2Status::print(maxint_t n, maxint_t limit, double rsd)
{
  double t2 = get_wtime();
  if (old_ >= 0 && (t2 - time_) < 0.01)
    return;

  time_ = t2;
  int percent = skewed_percent(n, limit);
  int load_balance = (int) in_between(0, 100 - rsd + 0.5, 100);
  old_ = percent;

  ostringstream oss;
  oss << "\r" << string(40,' ');
  oss << "\rStatus: " << percent << "%, ";
  oss << "Load balance: " << load_balance << "%";
  cout << oss.str() << flush;
}
示例#14
0
int128_t S2_trivial(int128_t x,
                    int64_t y,
                    int64_t z,
                    int64_t c,
                    int threads)
{
  print("");
  print("=== S2_trivial(x, y) ===");
  print("Computation of the trivial special leaves");
  print(x, y, c, threads);

  double time = get_wtime();
  int128_t s2_trivial = S2_trivial_OpenMP(x, y, z, c, threads);

  print("S2_trivial", s2_trivial, time);
  return s2_trivial;
}
示例#15
0
int64_t S2_easy(int64_t x,
                int64_t y,
                int64_t z,
                int64_t c,
                int threads)
{
  print("");
  print("=== S2_easy(x, y) ===");
  print("Computation of the easy special leaves");
  print(x, y, c, threads);

  double time = get_wtime();
  vector<int32_t> primes = generate_primes(y);
  int64_t s2_easy = S2_easy::S2_easy((intfast64_t) x, y, z, c, primes, threads);

  print("S2_easy", s2_easy, time);
  return s2_easy;
}
示例#16
0
void test(const int nrep)
{
  REAL mat[N][N] __attribute__((aligned(64)));
  REAL in [N][N] __attribute__((aligned(64)));
  REAL out[N][N] __attribute__((aligned(64)));
  REAL fdum;

  for (int j = 0; j < N; j++)
    for (int i = j; i < N; i++)
      mat[i][j] = mat[j][i] = drand48();

  for (int j = 0; j < N; j++)
    for (int i = 0; i < N; i++)
    {
      REAL res = 0.0;
      for (int k = 0; k < N; k++)
        res += mat[j][k] * mat[k][i];
      in[j][i] = res;
    }
  for (int j = 0; j < N; j++)
    for (int i = j; i < N; i++)
      assert(in[i][j] == in[j][i]);



  double t0 = get_wtime();
#pragma omp parallel for firstprivate(in) private(out)
  for (int r = 0; r < nrep; r++)
    assert(inverse_cholesky(in, out) > 0);
  const double dt_custom = get_wtime() - t0;
  fprintf(stderr, " custom inverse= %g sec \n", dt_custom);

  t0 = get_wtime();
#pragma omp parallel for firstprivate(in) private(out, fdum)
  for (int r = 0; r < nrep; r++)
    assert(my_inverse(in, out, fdum) == false);
  const double dt_lapack = get_wtime() - t0;
  fprintf(stderr, " LAPACK inverse= %g sec (ratio= %g)\n", dt_lapack, dt_lapack/dt_custom);

#if 0   /* does not work from MKL */
  t0 = get_wtime();
  for (int r = 0; r < nrep; r++)
    assert(my_inverse_gold(in, out, fdum) == false);
  fprintf(stderr, " DSYEVD inverse= %g sec \n", get_wtime() - t0);
#endif
};
示例#17
0
int64_t S2_easy(int64_t x,
                int64_t y,
                int64_t z,
                int64_t c,
                int threads)
{
#ifdef HAVE_MPI
  if (mpi_num_procs() > 1)
    return S2_easy_mpi(x, y, z, c, threads);
#endif

  print("");
  print("=== S2_easy(x, y) ===");
  print("Computation of the easy special leaves");
  print(x, y, c, threads);

  double time = get_wtime();
  vector<int32_t> primes = generate_primes(y);
  int64_t s2_easy = S2_easy_OpenMP((intfast64_t) x, y, z, c, primes, threads);

  print("S2_easy", s2_easy, time);
  return s2_easy;
}
示例#18
0
int main(int argc, char **argv)
{
#ifdef HAVE_MPI
    int required_thread_support=MPI_THREAD_SINGLE;
    int provided_thread_support;
    MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support);
    assert(required_thread_support==provided_thread_support);
#endif

    bool verbose = false;
    if(argc>1) {
        verbose = std::string(argv[1])=="-v";
    }

#ifdef HAVE_VTK
    Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box10x10x10.vtu");
    mesh->create_boundary();

    MetricField<double,3> metric_field(*mesh);

    size_t NNodes = mesh->get_number_nodes();

    std::vector<double> psi(NNodes);
    for(size_t i=0; i<NNodes; i++)
        psi[i] =
            pow(mesh->get_coords(i)[0], 4) +
            pow(mesh->get_coords(i)[1], 4) +
            pow(mesh->get_coords(i)[2], 4);

    metric_field.add_field(&(psi[0]), 0.001);
    metric_field.update_mesh();

    Refine<double,3> adapt(*mesh);

    double tic = get_wtime();
    for(int i=0; i<2; i++)
        adapt.refine(sqrt(2.0));
    double toc = get_wtime();

    if(verbose)
        mesh->verify();

    mesh->defragment();

    VTKTools<double>::export_vtu("../data/test_refine_3d", mesh);

    double qmean = mesh->get_qmean();
    double qmin = mesh->get_qmin();
    int nelements = mesh->get_number_elements();

    if(verbose)
        std::cout<<"Refine loop time:    "<<toc-tic<<std::endl
                 <<"Number elements:     "<<nelements<<std::endl
                 <<"Quality mean:        "<<qmean<<std::endl
                 <<"Quality min:         "<<qmin<<std::endl;

    long double area = mesh->calculate_area();
    long double volume = mesh->calculate_volume();

    long double ideal_area(6), ideal_volume(1);
    std::cout<<"Checking area == 6: ";
    if(std::abs(area-ideal_area)/std::max(area, ideal_area)<DBL_EPSILON)
        std::cout<<"pass"<<std::endl;
    else
        std::cout<<"fail (area="<<area<<")"<<std::endl;

    std::cout<<"Checking volume == 1: ";
    if(std::abs(volume-ideal_volume)/std::max(volume, ideal_volume)<DBL_EPSILON)
        std::cout<<"pass"<<std::endl;
    else
        std::cout<<"fail (volume="<<volume<<")"<<std::endl;

    delete mesh;
#else
    std::cerr<<"Pragmatic was configured without VTK"<<std::endl;
#endif

#ifdef HAVE_MPI
    MPI_Finalize();
#endif

    return 0;
}
示例#19
0
void GPUNB_regf(
		int ni,
		double h2d[],
		double dtr[],
		double xid[][3],
		double vid[][3],
		double acc[][3],
		double jrk[][3],
		double pot[],
		int lmax,
		int nbmax,
		int *listbase,
        int m_flag){
	// std::cout << " Call GPUNB_regf " << ni << std::endl;
	time_grav -= get_wtime();
	numInter += ni * nbody;
    ::icall++;
    ::ini +=ni;
#pragma omp parallel for
	for(int i=0; i<ni; i+=4){
		int tid = omp_get_thread_num();
		nblist[tid][0].clear();
		nblist[tid][1].clear();
		nblist[tid][2].clear();
		nblist[tid][3].clear();
		int nii = std::min(4, ni-i);

		v4sf xi  = {xid[i+0][0], xid[i+1][0], xid[i+2][0], xid[i+3][0]}; 
		v4sf yi  = {xid[i+0][1], xid[i+1][1], xid[i+2][1], xid[i+3][1]}; 
		v4sf zi  = {xid[i+0][2], xid[i+1][2], xid[i+2][2], xid[i+3][2]}; 
		v4sf vxi = {vid[i+0][0], vid[i+1][0], vid[i+2][0], vid[i+3][0]}; 
		v4sf vyi = {vid[i+0][1], vid[i+1][1], vid[i+2][1], vid[i+3][1]}; 
		v4sf vzi = {vid[i+0][2], vid[i+1][2], vid[i+2][2], vid[i+3][2]}; 
		v4sf h2i = {h2d[i+0], h2d[i+1], h2d[i+2], h2d[i+3]}; 
		static const v4sf h2mask[5] = {
			{0.0, 0.0, 0.0, 0.0},
			{1.0, 0.0, 0.0, 0.0},
			{1.0, 1.0, 0.0, 0.0},
			{1.0, 1.0, 1.0, 0.0},
			{1.0, 1.0, 1.0, 1.0},
		};
		h2i *= h2mask[nii];
		v4sf dtri = {dtr[i+0], dtr[i+1], dtr[i+2], dtr[i+3]}; 
		v4sf Ax = {0.f, 0.f, 0.f, 0.f};
		v4sf Ay = {0.f, 0.f, 0.f, 0.f};
		v4sf Az = {0.f, 0.f, 0.f, 0.f};
		v4sf Jx = {0.f, 0.f, 0.f, 0.f};
		v4sf Jy = {0.f, 0.f, 0.f, 0.f};
		v4sf Jz = {0.f, 0.f, 0.f, 0.f};
		v4sf poti = {0.f, 0.f, 0.f, 0.f};
		v4sf *jpp = (v4sf *)jp_host;
		for(int j=0; j<nbody; j++, jpp+=2){
			v4sf jp0 = jpp[0];
			v4sf jp1 = jpp[1];

			v4sf xj = __builtin_ia32_shufps(jp0, jp0, 0x00);
			v4sf yj = __builtin_ia32_shufps(jp0, jp0, 0x55);
			v4sf zj = __builtin_ia32_shufps(jp0, jp0, 0xaa);
			v4sf mj = __builtin_ia32_shufps(jp0, jp0, 0xff);
			v4sf vxj = __builtin_ia32_shufps(jp1, jp1, 0x00);
			v4sf vyj = __builtin_ia32_shufps(jp1, jp1, 0x55);
			v4sf vzj = __builtin_ia32_shufps(jp1, jp1, 0xaa);

			v4sf dx = xj - xi;
			v4sf dy = yj - yi;
			v4sf dz = zj - zi;
			v4sf dvx = vxj - vxi;
			v4sf dvy = vyj - vyi;
			v4sf dvz = vzj - vzi;

			v4sf dxp = dx + dtri * dvx;
			v4sf dyp = dy + dtri * dvy;
			v4sf dzp = dz + dtri * dvz;

			v4sf r2 = dx*dx + dy*dy + dz*dz;
			v4sf rv = dx*dvx + dy*dvy + dz*dvz;
			v4sf r2p = dxp*dxp + dyp*dyp + dzp*dzp;
            v4sf mask;
            //          v4sf mask = (v4sf)__builtin_ia32_cmpltps(r2, h2i);
            if(m_flag) {
              v4sf mh2i = mj * h2i;
              mask = (v4sf)__builtin_ia32_cmpltps(
                       __builtin_ia32_minps(r2,r2p), mh2i);
            }
            else {
              mask = (v4sf)__builtin_ia32_cmpltps(
                       __builtin_ia32_minps(r2,r2p), h2i);
            }
			int bits = __builtin_ia32_movmskps(mask);
			// mj = __builtin_ia32_andnps(mask, mj);
			if(bits){
				if (bits&1) nblist[tid][0].push_back(j);
				if (bits&2) nblist[tid][1].push_back(j);
				if (bits&4) nblist[tid][2].push_back(j);
				if (bits&8) nblist[tid][3].push_back(j);
			}

			v4sf rinv1 = v4sf_rsqrt(r2);
			rinv1 = __builtin_ia32_andnps(mask, rinv1);
			// v4sf rinv1 = __builtin_ia32_rsqrtps(r2);
			v4sf rinv2 = rinv1 * rinv1;
			rinv1 *= mj;
			poti += rinv1;
			v4sf rinv3 = rinv1 * rinv2;
			rv *= (v4sf){-3.f, -3.f, -3.f, -3.f} * rinv2;

			Ax += rinv3 * dx;
			Ay += rinv3 * dy;
			Az += rinv3 * dz;
			Jx += rinv3 * (dvx + rv * dx);
			Jy += rinv3 * (dvy + rv * dy);
			Jz += rinv3 * (dvz + rv * dz);
		} // for(j)
		union {
			struct{
				v4sf Ax, Ay, Az, Jx, Jy, Jz, Pot;
			};
			struct{
				float acc[3][4], jrk[3][4], pot[4];
			};
		} u;
		u.Ax = Ax;
		u.Ay = Ay;
		u.Az = Az;
		u.Jx = Jx;
		u.Jy = Jy;
		u.Jz = Jz;
		u.Pot = poti;
		for(int ii=0; ii<nii; ii++){
			for(int k=0; k<3; k++){
				acc[i+ii][k] = u.acc[k][ii];
				jrk[i+ii][k] = u.jrk[k][ii];
			}
			pot[i+ii] = u.pot[ii];
			int nnb = nblist[tid][ii].size();
			int *nnbp = listbase + lmax * (i+ii);
			int *nblistp = nnbp + 1;
			if(nnb > nbmax){
				*nnbp = -nnb;
			}else{
				*nnbp = nnb;
				for(int k=0; k<nnb; k++){
					nblistp[k] = nblist[tid][ii][k];
				}
			}
		}
	}
	// printf("gpu: %e %e %e %d\n", xid[0][0], acc[0][0], jrk[0][0], *listbase);
#if 0
	if(ni > 0){
		FILE *fp = fopen("Force.sse", "w");
		assert(fp);
		for(int i=0; i<ni; i++){
			int nnb =  listbase[i*lmax];
			fprintf(fp, "%d %9.2e %9.2e %9.2e %9.2e %9.2e %9.2e %d\n",
					i, acc[i][0], acc[i][1], acc[i][2], 
					   jrk[i][0], jrk[i][1], jrk[i][2], nnb);
		}
		fprintf(fp, "\n");
		fclose(fp);
		exit(1);
	}
#endif
	time_grav += get_wtime();
}
示例#20
0
int main(int argc, char * argv [])
{
  int n = 1000;
  srand48(120);
//  srand48(123);

  if (argc > 1)
    srand48(atoi(argv[1]));

  if (argc > 2)
    n = atoi(argv[2]);
  fprintf(stderr, "n= %d\n", n);

  MSW lp(1.0);

#if 1
  for (int i = 0; i < n; i++)
  {
    const real lx = 0.75;
    const real ly = 0.75;
    const real lz = 0.75;
    const real nx = (1.0-2.0*drand48())*lx;
    const real ny = (1.0-2.0*drand48())*ly;
    const real nz = (1.0-2.0*drand48())*lz;
    const real x = -nx;
    const real y = -ny;
    const real z = -nz;
    lp.push(HalfSpace(vec3(nx,ny,nz), vec3(x, y, z)));
  }
#else
    lp.push(HalfSpace(vec3(-1.0, -1.0,  0.0), vec3(0.3, 0.3, 0.00)));
    lp.push(HalfSpace(vec3(+1.0, 0.0, 0.0), vec3(0.25, 0.5, 0.5)));
    lp.push(HalfSpace(vec3(-1.0, 0.0, 0.0), vec3(0.75, 0.5, 0.5)));
    lp.push(HalfSpace(vec3(0.0, +1.0, 0.0), vec3(0.5, 0.25, 0.5)));
    lp.push(HalfSpace(vec3(0.0, -1.0, 0.0), vec3(0.5, 0.75, 0.5)));
    lp.push(HalfSpace(vec3(0.0, 0.0, +1.0), vec3(0.5, 0.5, 0.25)));
    lp.push(HalfSpace(vec3(0.0, 0.0, -1.0), vec3(0.5, 0.5, 0.75)));
 //   lp.push(HalfSpace(vec3(0.0, 0.0, +1.0), vec3(0.5, 0.5, 0.40)));
//    lp.push(HalfSpace(vec3(0.0, 0.0, -1.0), vec3(0.5, 0.5, 0.30)));
//    lp.push(HalfSpace(vec3(0.0, +1.0, 0.0), vec3(0.5, 0.3, 0.30)));
#endif

  fprintf(stderr, " nspace= %d\n", lp.nspace());
  const int nrep = 1000;


  vec3 cvec(-1.0, +1.0, 0.0);
#if 0
  vec3 pos = lp.solve(cvec, false);
  fprintf(stderr, " pos= %g %g %g \n", pos.x, pos.y, pos.z);
#else
  cvec = vec3(1.0 - 2.0*drand48(), 1.0 - 2.0*drand48(), 1.0 - 2.0*drand48());
  vec3 pos = lp.solve(cvec, false);
  {
    const double t0 = get_wtime();
    fprintf(stderr, " pos= %g %g %g \n", pos.x, pos.y, pos.z);
    for (int i = 0; i < nrep; i++)
    {
      vec3 pos1 = lp.solve(cvec, false);
      if ((pos1 - pos).norm2() > 1.0e-20*pos.norm2())
        fprintf(stderr, " pos= %g %g %g \n", pos1.x, pos1.y, pos1.z);
    }
    const double dt = get_wtime() - t0;
    fprintf(stderr, " norm  done in %g sec\n", dt/nrep);
  }
  {
    const double t0 = get_wtime();
    for (int i = 0; i < nrep; i++)
    {
      vec3 pos1 = lp.solve(cvec, true);
      if ((pos1 - pos).norm2() > 1.0e-20*pos.norm2())
        fprintf(stderr, " rpos= %g %g %g \n", pos1.x, pos1.y, pos1.z);
    }
    const double dt = get_wtime() - t0;
    fprintf(stderr, " rand  done in %g sec\n", dt/nrep);
  }
  
  {
    nflops = 0;
    int nrep = 100000;
    std::vector<vec3> vecList(nrep);
    const double t0 = get_wtime();
    for (int i = 0; i < nrep; i++)
    {
      const vec3 cvec(1.0 - 2.0*drand48(), 1.0 - 2.0*drand48(), 1.0 - 2.0*drand48());
      vecList[i] = lp.solve(cvec, false);
    }
    const double dt = get_wtime() - t0;
    fprintf(stderr, " test  done in %g sec [%g sec per element]\n", dt, dt/nrep);
    fprintf(stderr, " performance: %g GFLOP %g GFLOP/s \n", nflops*1.0/1e9, nflops*1.0/dt/1e9);
  }
#endif

  return 0;
}
int main(int argc, char **argv)
{
    int rank=0;
    int required_thread_support=MPI_THREAD_SINGLE;
    int provided_thread_support;
    MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support);
    assert(required_thread_support==provided_thread_support);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    bool verbose = false;
    if(argc>1) {
        verbose = std::string(argv[1])=="-v";
    }

#ifdef HAVE_VTK
    Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box200x200.vtu");
    mesh->create_boundary();

    MetricField<double,2> metric_field(*mesh);

    size_t NNodes = mesh->get_number_nodes();
    for(size_t i=0; i<NNodes; i++) {
        double m[] = {0.5, 0.0, 0.5};
        metric_field.set_metric(m, i);
    }
    metric_field.update_mesh();

    Coarsen<double,2> adapt(*mesh);

    double L_up = sqrt(2.0);
    double L_low = L_up*0.5;

    double tic = get_wtime();
    adapt.coarsen(L_low, L_up);
    double toc = get_wtime();

    mesh->defragment();

    int nelements = mesh->get_number_elements();

    long double perimeter = mesh->calculate_perimeter();
    long double area = mesh->calculate_area();

    if(verbose) {

        if(rank==0)
            std::cout<<"Coarsen loop time:    "<<toc-tic<<std::endl
                     <<"Number elements:      "<<nelements<<std::endl
                     <<"Perimeter:            "<<perimeter<<std::endl;
    }

    VTKTools<double>::export_vtu("../data/test_coarsen_2d", mesh);

    delete mesh;

    if(rank==0) {
        std::cout<<"Expecting 2 elements: ";
        if(nelements==2)
            std::cout<<"pass"<<std::endl;
        else
            std::cout<<"fail"<<std::endl;

        long double perimeter_exact(4);
        std::cout<<"Expecting perimeter = "<<perimeter_exact<<": "<<perimeter<<" ("<<std::abs(perimeter-perimeter_exact)/std::max(perimeter, perimeter_exact)<<") ";
        if(std::abs(perimeter-perimeter_exact)/std::max(perimeter, perimeter_exact)<DBL_EPSILON)
            std::cout<<"pass"<<std::endl;
        else
            std::cout<<"fail"<<std::endl;

        long double area_exact = 1;
        std::cout<<"Expecting area = "<<area_exact<<": ";
        if(std::abs(area-area_exact)/std::max(area, area_exact)<DBL_EPSILON)
            std::cout<<"pass"<<std::endl;
        else
            std::cout<<"fail"<<std::endl;
    }
#else
    std::cerr<<"Pragmatic was configured without VTK"<<std::endl;
#endif

    MPI_Finalize();

    return 0;
}
示例#22
0
int main(int argc, char **argv)
{
#ifdef HAVE_MPI
    int required_thread_support=MPI_THREAD_SINGLE;
    int provided_thread_support;
    MPI_Init_thread(&argc, &argv, required_thread_support, &provided_thread_support);
    assert(required_thread_support==provided_thread_support);
#endif

#ifdef HAVE_VTK
    Mesh<double> *mesh=VTKTools<double>::import_vtu("../data/box200x200.vtu");
    mesh->create_boundary();

    size_t NNodes = mesh->get_number_nodes();

    // Set up field - use first touch policy
    std::vector<double> psi(NNodes);
    #pragma omp parallel
    {
        #pragma omp for schedule(static)
        for(size_t i=0; i<NNodes; i++)
            psi[i] = pow(mesh->get_coords(i)[0]+0.1, 2) + pow(mesh->get_coords(i)[1]+0.1, 2);
    }

    MetricField<double,2> metric_field(*mesh);

    double tic = get_wtime();
    metric_field.add_field(&(psi[0]), 1.0);
    double toc = get_wtime();

    metric_field.update_mesh();

    std::vector<double> metric(NNodes*3);
    metric_field.get_metric(&(metric[0]));

    double rms[] = {0., 0., 0.};
    for(size_t i=0; i<NNodes; i++) {
        rms[0] += pow(2.0-metric[i*3  ], 2);
        rms[1] += pow(    metric[i*3+1], 2);
        rms[2] += pow(2.0-metric[i*3+2], 2);
    }

    double max_rms = 0;
    for(size_t i=0; i<3; i++) {
        rms[i] = sqrt(rms[i]/NNodes);
        max_rms = std::max(max_rms, rms[i]);
    }

    std::string vtu_filename("../data/test_hessian_2d");
    VTKTools<double>::export_vtu(vtu_filename.c_str(), mesh, &(psi[0]));

    std::cout<<"Hessian :: loop time = "<<toc-tic<<std::endl
             <<"RMS = "<<rms[0]<<", "<<rms[1]<<", "<<rms[2]<<std::endl;
    if(max_rms>0.01)
        std::cout<<"fail\n";
    else
        std::cout<<"pass\n";

    delete mesh;
#else
    std::cerr<<"Pragmatic was configured without VTK"<<std::endl;
#endif

#ifdef HAVE_MPI
    MPI_Finalize();
#endif

    return 0;
}