예제 #1
0
static inline __m128 
exp2f4(__m128 x)
{
	__m128i ipart;
	__m128 fpart, expipart, expfpart;

	x = _mm_min_ps(x, _mm_load_ps(_one29_ps));
	x = _mm_max_ps(x, _mm_load_ps(_minusone27_ps));

	/* ipart = int(x - 0.5) */
	ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_load_ps(_half_ps)));

	/* fpart = x - ipart */
	fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));

	/* expipart = (float) (1 << ipart) */
	expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_load_si128((__m128i*)_one27)), 23));

	/* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
#if EXP_POLY_DEGREE == 5
	expfpart = POLY5(fpart, exp_p5_0, exp_p5_1, exp_p5_2, exp_p5_3, exp_p5_4, exp_p5_5);
#elif EXP_POLY_DEGREE == 4
	expfpart = POLY4(fpart, exp_p4_0, exp_p4_1, exp_p4_2, exp_p4_3, exp_p4_4);
#elif EXP_POLY_DEGREE == 3
	expfpart = POLY3(fpart, exp_p3_0, exp_p3_1, exp_p3_2, exp_p3_3);
#elif EXP_POLY_DEGREE == 2
	expfpart = POLY2(fpart, exp_p2_0, exp_p2_1, exp_p2_2);
#else
#error
#endif

	return _mm_mul_ps(expipart, expfpart);
}
예제 #2
0
static inline __m128 
log2f4(__m128 x)
{
	__m128i exp = _mm_load_si128((__m128i*)_exp_mask);
	__m128i mant = _mm_load_si128((__m128i*)_mantissa_mask);
	__m128 one = _mm_load_ps(_ones_ps);
	__m128i i = _mm_castps_si128(x);
	__m128 e = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, exp), 23), _mm_load_si128((__m128i*)_one27)));
	__m128 m = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mant)), one);
	__m128 p;

	/* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ */
#if LOG_POLY_DEGREE == 6
	p = POLY5( m, log_p5_0, log_p5_1, log_p5_2, log_p5_3, log_p5_4, log_p5_5);
#elif LOG_POLY_DEGREE == 5
	p = POLY4(m, log_p4_0, log_p4_1, log_p4_2, log_p4_3, log_p4_4);
#elif LOG_POLY_DEGREE == 4
	p = POLY3(m, log_p3_0, log_p3_1, log_p3_2, log_p3_3);
#elif LOG_POLY_DEGREE == 3
	p = POLY2(m, log_p2_0, log_p2_1, log_p2_2);
#else
#error
#endif

	/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
	p = _mm_mul_ps(p, _mm_sub_ps(m, one));

	return _mm_add_ps(p, e);
}
예제 #3
0
void MblkGeometry::buildSShellGridOnPatch(
   const hier::Patch& patch,
   const hier::Box& domain,
   const int xyz_id,
   const int level_number,
   const int block_number)
{

   bool xyz_allocated = patch.checkAllocated(xyz_id);
   if (!xyz_allocated) {
      TBOX_ERROR("xyz data not allocated" << std::endl);
      //patch.allocatePatchData(xyz_id);
   }

   boost::shared_ptr<pdat::NodeData<double> > xyz(
      BOOST_CAST<pdat::NodeData<double>, hier::PatchData>(
         patch.getPatchData(xyz_id)));

   TBOX_ASSERT(xyz);

   if (d_dim == tbox::Dimension(3)) {

      const hier::Index ifirst = patch.getBox().lower();
      const hier::Index ilast = patch.getBox().upper();
      hier::IntVector nghost_cells = xyz->getGhostCellWidth();

      //int imin = ifirst(0);
      //int imax = ilast(0)  + 1;
      //int jmin = ifirst(1);
      //int jmax = ilast(1)  + 1;
      //int kmin = ifirst(2);
      //int kmax = ilast(2)  + 1;
      //int nx   = imax - imin + 1;
      //int ny   = jmax - jmin + 1;
      //int nxny = nx*ny;

      int nd_imin = ifirst(0) - nghost_cells(0);
      int nd_imax = ilast(0) + 1 + nghost_cells(0);
      int nd_jmin = ifirst(1) - nghost_cells(1);
      int nd_jmax = ilast(1) + 1 + nghost_cells(1);
      int nd_kmin = ifirst(2) - nghost_cells(2);
      int nd_kmax = ilast(2) + 1 + nghost_cells(2);
      int nd_nx = nd_imax - nd_imin + 1;
      int nd_ny = nd_jmax - nd_jmin + 1;
      int nd_nxny = nd_nx * nd_ny;

      double* x = xyz->getPointer(0);
      double* y = xyz->getPointer(1);
      double* z = xyz->getPointer(2);

      bool found = false;

      int nrad = (domain.upper(0) - domain.lower(0) + 1);
      int nth = (domain.upper(1) - domain.lower(1) + 1);
      int nphi = (domain.upper(2) - domain.lower(2) + 1);

      /*
       * If its a solid shell, its a single block and dx = dr, dth, dphi
       */
      if (d_sshell_type == "SOLID") {

         d_dx[level_number][block_number][0] = (d_sshell_rmax - d_sshell_rmin) / (double)nrad;
         d_dx[level_number][block_number][1] =
            2.0 * tbox::MathUtilities<double>::Abs(d_sangle_thmin)
            / (double)nth;
         d_dx[level_number][block_number][2] =
            2.0 * tbox::MathUtilities<double>::Abs(d_sangle_thmin)
            / (double)nphi;

         //
         // step in a radial direction in x and set y and z appropriately
         // for a solid angle we go -th to th and -phi to phi
         //
         for (int k = nd_kmin; k <= nd_kmax; ++k) {
            for (int j = nd_jmin; j <= nd_jmax; ++j) {

               double theta = d_sangle_thmin + j * d_dx[level_number][block_number][1]; // dx used for dth
               double phi = d_sangle_thmin + k * d_dx[level_number][block_number][2];

               double xface = cos(theta) * cos(phi);
               double yface = sin(theta) * cos(phi);
               double zface = sin(phi);

               for (int i = nd_imin; i <= nd_imax; ++i) {

                  int ind = POLY3(i,
                        j,
                        k,
                        nd_imin,
                        nd_jmin,
                        nd_kmin,
                        nd_nx,
                        nd_nxny);

                  double r = d_sshell_rmin + d_dx[level_number][block_number][0] * (i);

                  double xx = r * xface;
                  double yy = r * yface;
                  double zz = r * zface;

                  x[ind] = xx;
                  y[ind] = yy;
                  z[ind] = zz;
               }
            }
         }

         found = true;
      }

      /*
       * If its an octant problem, then its got multiple (three) blocks
       */
      if (d_sshell_type == "OCTANT") {

         double drad = (d_sshell_rmax - d_sshell_rmin) / nrad;

         //
         // as in the solid angle we go along a radial direction in
         // x setting y and z appropriately, but here we have logic for
         // the block we are in.  This is contained in the dispOctant.m
         // matlab code.
         //
         for (int k = nd_kmin; k <= nd_kmax; ++k) {
            for (int j = nd_jmin; j <= nd_jmax; ++j) {

               //
               // compute the position on the unit sphere for our radial line
               //
               double xface, yface, zface;
               computeUnitSphereOctant(block_number, nth, j, k,
                  &xface, &yface, &zface);

               for (int i = nd_imin; i <= nd_imax; ++i) {
                  int ind = POLY3(i,
                        j,
                        k,
                        nd_imin,
                        nd_jmin,
                        nd_kmin,
                        nd_nx,
                        nd_nxny);

                  double r = d_sshell_rmin + drad * (i);

                  double xx = r * xface;
                  double yy = r * yface;
                  double zz = r * zface;

                  x[ind] = xx;
                  y[ind] = yy;
                  z[ind] = zz;
               }
            }
         }
         found = true;
      }

      if (!found) {
         TBOX_ERROR(
            d_object_name << ": "
                          << "spherical shell nodal positions for "
                          << d_sshell_type
                          << " not found" << std::endl);
      }

   }

}
예제 #4
0
void MblkGeometry::buildWedgeGridOnPatch(
   const hier::Patch& patch,
   const int xyz_id,
   const int level_number,
   const int block_number)
{

   boost::shared_ptr<pdat::NodeData<double> > xyz(
      BOOST_CAST<pdat::NodeData<double>, hier::PatchData>(
         patch.getPatchData(xyz_id)));

   TBOX_ASSERT(xyz);

   const hier::Index ifirst = patch.getBox().lower();
   const hier::Index ilast = patch.getBox().upper();
   hier::IntVector nghost_cells = xyz->getGhostCellWidth();

   int nd_imin = ifirst(0) - nghost_cells(0);
   int nd_imax = ilast(0) + 1 + nghost_cells(0);
   int nd_jmin = ifirst(1) - nghost_cells(1);
   int nd_jmax = ilast(1) + 1 + nghost_cells(1);
   int nd_nx = nd_imax - nd_imin + 1;
   int nd_ny = nd_jmax - nd_jmin + 1;
   //int nd_nz   = nd_kmax - nd_kmin + 1;
   int nd_nxny = nd_nx * nd_ny;
   //int nd_nel  = nd_nx*nd_ny*nd_nz;

   double dx[SAMRAI::MAX_DIM_VAL];
   dx[0] = d_dx[level_number][block_number][0];
   dx[1] = d_dx[level_number][block_number][1];

   double* x = xyz->getPointer(0);
   double* y = xyz->getPointer(1);

   int nd_kmin;
   int nd_kmax;
   dx[2] = d_dx[level_number][block_number][2];
   double* z = 0;
   if (d_dim == tbox::Dimension(3)) {
      nd_kmin = ifirst(2) - nghost_cells(2);
      nd_kmax = ilast(2) + 1 + nghost_cells(2);
      dx[2] = d_dx[level_number][block_number][2];
      z = xyz->getPointer(2);
   } else {
      nd_kmin = 0;
      nd_kmax = 0;
   }

   //
   // ----------- set the wedge nodal positions
   //

   for (int k = nd_kmin; k <= nd_kmax; ++k) {
      for (int j = nd_jmin; j <= nd_jmax; ++j) {
         for (int i = nd_imin; i <= nd_imax; ++i) {

            int ind = POLY3(i, j, k, nd_imin, nd_jmin, nd_kmin, nd_nx, nd_nxny);

            double r = d_wedge_rmin[block_number] + dx[0] * (i);
            double th = d_wedge_thmin + dx[1] * (j);

            double xx = r * cos(th);
            double yy = r * sin(th);

            x[ind] = xx;
            y[ind] = yy;

            if (d_dim == tbox::Dimension(3)) {
               double zz = d_wedge_zmin + dx[2] * (k);
               z[ind] = zz;
            }
         }
      }
   }
}