static inline __m128 exp2f4(__m128 x) { __m128i ipart; __m128 fpart, expipart, expfpart; x = _mm_min_ps(x, _mm_load_ps(_one29_ps)); x = _mm_max_ps(x, _mm_load_ps(_minusone27_ps)); /* ipart = int(x - 0.5) */ ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_load_ps(_half_ps))); /* fpart = x - ipart */ fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); /* expipart = (float) (1 << ipart) */ expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_load_si128((__m128i*)_one27)), 23)); /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ #if EXP_POLY_DEGREE == 5 expfpart = POLY5(fpart, exp_p5_0, exp_p5_1, exp_p5_2, exp_p5_3, exp_p5_4, exp_p5_5); #elif EXP_POLY_DEGREE == 4 expfpart = POLY4(fpart, exp_p4_0, exp_p4_1, exp_p4_2, exp_p4_3, exp_p4_4); #elif EXP_POLY_DEGREE == 3 expfpart = POLY3(fpart, exp_p3_0, exp_p3_1, exp_p3_2, exp_p3_3); #elif EXP_POLY_DEGREE == 2 expfpart = POLY2(fpart, exp_p2_0, exp_p2_1, exp_p2_2); #else #error #endif return _mm_mul_ps(expipart, expfpart); }
static inline __m128 log2f4(__m128 x) { __m128i exp = _mm_load_si128((__m128i*)_exp_mask); __m128i mant = _mm_load_si128((__m128i*)_mantissa_mask); __m128 one = _mm_load_ps(_ones_ps); __m128i i = _mm_castps_si128(x); __m128 e = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, exp), 23), _mm_load_si128((__m128i*)_one27))); __m128 m = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mant)), one); __m128 p; /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ */ #if LOG_POLY_DEGREE == 6 p = POLY5( m, log_p5_0, log_p5_1, log_p5_2, log_p5_3, log_p5_4, log_p5_5); #elif LOG_POLY_DEGREE == 5 p = POLY4(m, log_p4_0, log_p4_1, log_p4_2, log_p4_3, log_p4_4); #elif LOG_POLY_DEGREE == 4 p = POLY3(m, log_p3_0, log_p3_1, log_p3_2, log_p3_3); #elif LOG_POLY_DEGREE == 3 p = POLY2(m, log_p2_0, log_p2_1, log_p2_2); #else #error #endif /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ p = _mm_mul_ps(p, _mm_sub_ps(m, one)); return _mm_add_ps(p, e); }
void MblkGeometry::buildSShellGridOnPatch( const hier::Patch& patch, const hier::Box& domain, const int xyz_id, const int level_number, const int block_number) { bool xyz_allocated = patch.checkAllocated(xyz_id); if (!xyz_allocated) { TBOX_ERROR("xyz data not allocated" << std::endl); //patch.allocatePatchData(xyz_id); } boost::shared_ptr<pdat::NodeData<double> > xyz( BOOST_CAST<pdat::NodeData<double>, hier::PatchData>( patch.getPatchData(xyz_id))); TBOX_ASSERT(xyz); if (d_dim == tbox::Dimension(3)) { const hier::Index ifirst = patch.getBox().lower(); const hier::Index ilast = patch.getBox().upper(); hier::IntVector nghost_cells = xyz->getGhostCellWidth(); //int imin = ifirst(0); //int imax = ilast(0) + 1; //int jmin = ifirst(1); //int jmax = ilast(1) + 1; //int kmin = ifirst(2); //int kmax = ilast(2) + 1; //int nx = imax - imin + 1; //int ny = jmax - jmin + 1; //int nxny = nx*ny; int nd_imin = ifirst(0) - nghost_cells(0); int nd_imax = ilast(0) + 1 + nghost_cells(0); int nd_jmin = ifirst(1) - nghost_cells(1); int nd_jmax = ilast(1) + 1 + nghost_cells(1); int nd_kmin = ifirst(2) - nghost_cells(2); int nd_kmax = ilast(2) + 1 + nghost_cells(2); int nd_nx = nd_imax - nd_imin + 1; int nd_ny = nd_jmax - nd_jmin + 1; int nd_nxny = nd_nx * nd_ny; double* x = xyz->getPointer(0); double* y = xyz->getPointer(1); double* z = xyz->getPointer(2); bool found = false; int nrad = (domain.upper(0) - domain.lower(0) + 1); int nth = (domain.upper(1) - domain.lower(1) + 1); int nphi = (domain.upper(2) - domain.lower(2) + 1); /* * If its a solid shell, its a single block and dx = dr, dth, dphi */ if (d_sshell_type == "SOLID") { d_dx[level_number][block_number][0] = (d_sshell_rmax - d_sshell_rmin) / (double)nrad; d_dx[level_number][block_number][1] = 2.0 * tbox::MathUtilities<double>::Abs(d_sangle_thmin) / (double)nth; d_dx[level_number][block_number][2] = 2.0 * tbox::MathUtilities<double>::Abs(d_sangle_thmin) / (double)nphi; // // step in a radial direction in x and set y and z appropriately // for a solid angle we go -th to th and -phi to phi // for (int k = nd_kmin; k <= nd_kmax; ++k) { for (int j = nd_jmin; j <= nd_jmax; ++j) { double theta = d_sangle_thmin + j * d_dx[level_number][block_number][1]; // dx used for dth double phi = d_sangle_thmin + k * d_dx[level_number][block_number][2]; double xface = cos(theta) * cos(phi); double yface = sin(theta) * cos(phi); double zface = sin(phi); for (int i = nd_imin; i <= nd_imax; ++i) { int ind = POLY3(i, j, k, nd_imin, nd_jmin, nd_kmin, nd_nx, nd_nxny); double r = d_sshell_rmin + d_dx[level_number][block_number][0] * (i); double xx = r * xface; double yy = r * yface; double zz = r * zface; x[ind] = xx; y[ind] = yy; z[ind] = zz; } } } found = true; } /* * If its an octant problem, then its got multiple (three) blocks */ if (d_sshell_type == "OCTANT") { double drad = (d_sshell_rmax - d_sshell_rmin) / nrad; // // as in the solid angle we go along a radial direction in // x setting y and z appropriately, but here we have logic for // the block we are in. This is contained in the dispOctant.m // matlab code. // for (int k = nd_kmin; k <= nd_kmax; ++k) { for (int j = nd_jmin; j <= nd_jmax; ++j) { // // compute the position on the unit sphere for our radial line // double xface, yface, zface; computeUnitSphereOctant(block_number, nth, j, k, &xface, &yface, &zface); for (int i = nd_imin; i <= nd_imax; ++i) { int ind = POLY3(i, j, k, nd_imin, nd_jmin, nd_kmin, nd_nx, nd_nxny); double r = d_sshell_rmin + drad * (i); double xx = r * xface; double yy = r * yface; double zz = r * zface; x[ind] = xx; y[ind] = yy; z[ind] = zz; } } } found = true; } if (!found) { TBOX_ERROR( d_object_name << ": " << "spherical shell nodal positions for " << d_sshell_type << " not found" << std::endl); } } }
void MblkGeometry::buildWedgeGridOnPatch( const hier::Patch& patch, const int xyz_id, const int level_number, const int block_number) { boost::shared_ptr<pdat::NodeData<double> > xyz( BOOST_CAST<pdat::NodeData<double>, hier::PatchData>( patch.getPatchData(xyz_id))); TBOX_ASSERT(xyz); const hier::Index ifirst = patch.getBox().lower(); const hier::Index ilast = patch.getBox().upper(); hier::IntVector nghost_cells = xyz->getGhostCellWidth(); int nd_imin = ifirst(0) - nghost_cells(0); int nd_imax = ilast(0) + 1 + nghost_cells(0); int nd_jmin = ifirst(1) - nghost_cells(1); int nd_jmax = ilast(1) + 1 + nghost_cells(1); int nd_nx = nd_imax - nd_imin + 1; int nd_ny = nd_jmax - nd_jmin + 1; //int nd_nz = nd_kmax - nd_kmin + 1; int nd_nxny = nd_nx * nd_ny; //int nd_nel = nd_nx*nd_ny*nd_nz; double dx[SAMRAI::MAX_DIM_VAL]; dx[0] = d_dx[level_number][block_number][0]; dx[1] = d_dx[level_number][block_number][1]; double* x = xyz->getPointer(0); double* y = xyz->getPointer(1); int nd_kmin; int nd_kmax; dx[2] = d_dx[level_number][block_number][2]; double* z = 0; if (d_dim == tbox::Dimension(3)) { nd_kmin = ifirst(2) - nghost_cells(2); nd_kmax = ilast(2) + 1 + nghost_cells(2); dx[2] = d_dx[level_number][block_number][2]; z = xyz->getPointer(2); } else { nd_kmin = 0; nd_kmax = 0; } // // ----------- set the wedge nodal positions // for (int k = nd_kmin; k <= nd_kmax; ++k) { for (int j = nd_jmin; j <= nd_jmax; ++j) { for (int i = nd_imin; i <= nd_imax; ++i) { int ind = POLY3(i, j, k, nd_imin, nd_jmin, nd_kmin, nd_nx, nd_nxny); double r = d_wedge_rmin[block_number] + dx[0] * (i); double th = d_wedge_thmin + dx[1] * (j); double xx = r * cos(th); double yy = r * sin(th); x[ind] = xx; y[ind] = yy; if (d_dim == tbox::Dimension(3)) { double zz = d_wedge_zmin + dx[2] * (k); z[ind] = zz; } } } } }