result_type operator()(A0& yi, A1& inputs) const { yi.resize(inputs.extent()); const child0 & x = boost::proto::child_c<0>(inputs); if (numel(x) <= 1) BOOST_ASSERT_MSG(numel(x) > 1, "Interpolation requires at least two sample points in each dimension."); else { BOOST_ASSERT_MSG(issorted(x, 'a'), "for 'nearest' interpolation x values must be sorted in ascending order"); const child1 & y = boost::proto::child_c<1>(inputs); BOOST_ASSERT_MSG(numel(x) == numel(y), "The grid vectors do not define a grid of points that match the given values."); const child2 & xi = boost::proto::child_c<2>(inputs); bool extrap = false; value_type extrapval = Nan<value_type>(); choices(inputs, extrap, extrapval, N1()); table<index_type> index = bsearch (x, xi); table<value_type> dx = xi-x(index); table<index_type> indexp1 = oneplus(index); yi = y(nt2::if_else(lt(nt2::abs(xi-x(index)), nt2::abs(xi-x(indexp1))), index, indexp1)); value_type b = value_type(x(begin_)); value_type e = value_type(x(end_)); if (!extrap) yi = nt2::if_else(nt2::logical_or(boost::simd::is_nge(xi, b), boost::simd::is_nle(xi, e)), extrapval, yi); } return yi; }
static inline A0_n kernel_atan(const A0_n a0_n) { typedef typename meta::scalar_of<A0>::type sA0; const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); const A0 a0 = {a0_n}; const A0 x = nt2::abs(a0); const bA0 flag1 = lt(x, tan3pio8); const bA0 flag2 = logical_and(ge(x, tanpio8), flag1); A0 yy = if_zero_else(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), morebits)); z = z+if_zero_else(flag1, morebits); return yy + z; }
static inline A0 kernel_atan(const A0& a0) { if (is_eqz(a0)) return Zero<A0>(); if (is_inf(a0)) return Pio_2<A0>(); A0 x = nt2::abs(a0); A0 y; if( x >single_constant<A0,0x401a827a>())//2.414213562373095 ) /* tan 3pi/8 */ { y = Pio_2<A0>(); x = -rec(x); } else if( x > single_constant<A0,0x3ed413cd>()) //0.4142135623730950f ) /* tan pi/8 */ { y = Pio_4<A0>(); x = minusone(x)/oneplus(x); } else y = 0.0; A0 z = sqr(x); A0 z1 = madd(z, single_constant<A0,0x3da4f0d1>(),single_constant<A0,0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0,0x3e4c925f>(),single_constant<A0,0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); return add(y, madd(x, mul( z1, z), x)); // y += // ((( 8.05374449538e-2 * z // - 1.38776856032E-1) * z // + 1.99777106478E-1) * z // - 3.33329491539E-1) * z * x // + x; }
static inline A0 acos(const A0& a0) { if (a0 < Mhalf<A0>()) return Pi<A0>()-asin( nt2::sqrt(oneplus(a0)*Half<A0>()))*Two<A0>(); else if (a0 > Half<A0>()) return asin( nt2::sqrt(oneminus(a0)*Half<A0>()))*Two<A0>(); return (Pio_2<A0>()-asin(a0)); }
static inline A0 approx(const A0& x) { const A0 x2 = sqr(x); A0 y1 = madd(Const<A0,0x3e5345fd>(), x2,Const<A0,0x3f95eceb>()); A0 y2 = madd(Const<A0,0x3f0ac229>(), x2,Const<A0,0x400237b4>()); y1 = madd(y1, x2, Const<A0,0x4029a924>()); y2 = madd(y2, x2, Const<A0,0x40135d8e>()); return oneplus(x*madd(x, y1, y2)); }
static inline A0 atan(const A0& a0) { // static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>(); if (is_eqz(a0)) return a0; if (is_inf(a0)) return Pio_2<A0>()*sign(a0); A0 x = nt2::abs(a0); A0 y; A0 flag = (x > double_constant<double,0x4003504f333f9de6ll>()); if (flag) { y = Pio_2<A0>(); x = -rec(x); } else if ((x <= double_constant<double,0x3fe51eb851eb851fll>())) { y = Zero<A0>(); } else { y = Pio_4<A0>(); flag = Half<A0>(); x = minusone(x)/oneplus(x); } A0 z = sqr(x); z = z*horner< NT2_HORNER_COEFF_T(stype, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = madd(x, z, x); static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>(); z += flag * morebits; y = y + z; if( is_ltz(a0) ) y = -y; return(y); }
static inline void split(const X& x, size_t minsubs, XX& xx, L & pathlen) { // Split subintervals in the interval vector X so that, to working // precision, no subinterval is longer than 1/minsubs times the // total path length. Removes subintervals of zero length, except // that the resulting X will always has at least two elements on // return, i.e., if the total path length is zero, X will be // collapsed into a single interval of zero length. Also returns // the integration path length. typedef typename X::value_type itype_t; typedef typename meta::as_logical<itype_t> btype_t; typedef typename meta::as_integer<itype_t, signed> iitype_t; typedef typename container::table<itype_t> itab_t; typedef typename meta::as_real<itype_t>::type rtype_t; typedef typename container::table<rtype_t> rtab_t; typedef typename container::table<ptrdiff_t> ptab_t; rtab_t absdx = nt2::abs(nt2::diff(x)); pathlen = nt2::globalasum1(absdx); xx = x; if (pathlen > 0) { rtype_t udelta = minsubs/pathlen; rtab_t tmp_nnew = nt2::minusone(nt2::ceil(absdx*udelta)); //BOOST_AUTO_TPL(tmp_nnew, nt2::minusone(nt2::ceil(absdx*udelta))); ptab_t idxnew = nt2::rowvect(nt2::find(is_gtz(tmp_nnew))); rtab_t nnew = tmp_nnew(idxnew); for (size_t j = nt2::numel(idxnew); j >= 1; --j) { ptrdiff_t k = idxnew(j); rtype_t nnj = nnew(j); //Calculate new points. itab_t newpts = x(k)+(nt2::_(One<rtype_t>(), nnj)/oneplus(nnj))*(x(k+1)-x(k)); // newpts = newpts+x(k); // Insert the new points. itab_t xx1 = nt2::cath(nt2::cath(xx(nt2::_(begin_, k)),newpts),xx(nt2::_(k+1, end_))); xx = xx1; } } // Remove useless subintervals. itab_t xx1 = xx(nt2::cath(nt2::One<ptrdiff_t>(), nt2::oneplus(nt2::rowvect(nt2::find(nt2::is_nez(nt2::diff(xx))))))); if (nt2::isscalar(xx1)) xx = nt2::repnum(xx(begin_), 2, 1); else xx = xx1; }
static inline A0 approx(const A0& x) { typedef typename meta::scalar_of<A0>::type sA0; A0 xx = sqr(x); A0 px = x*horner<NT2_HORNER_COEFF_T(sA0, 4, (0x3fa4fd75f3062dd4ll, 0x40277d9474c55934ll, 0x40796b7a050349e4ll, 0x40a2b4798e134a01ll) )> (xx); A0 x2 = px/( horner<NT2_HORNER_COEFF_T(sA0, 4, (0x3ff0000000000000ll, 0x405545fdce51ca08ll, 0x4093e05eefd67782ll, 0x40a03f37650df6e2ll) )> (xx)-px); return oneplus(x2+x2); }
static inline A0_n kernel_atan(const A0_n a0_n) { const A0 a0 = {a0_n}; const A0 x = nt2::abs(a0); //here x is positive const bA0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8); const bA0 flag2 = logical_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = if_zero_else(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, (minusone(x)/oneplus(x)),xx); const A0 z = sqr(xx); A0 z1 = madd(z, single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>()); const A0 z2 = madd(z, single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); return add(yy, madd(xx, mul( z1, z), xx)); }
static inline A0 atan(const A0& a0) { A0 x, sign; x = nt2::abs(a0); sign = bitofsign(a0); // bf::tie(sign, x) = sign_and_abs(a0); const A0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8); const A0 flag2 = b_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, (minusone(x)/oneplus(x)),xx); const A0 z = sqr(xx); A0 z1 = madd(z, single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); yy = add(yy, madd(xx, mul( z1, z), xx)); return b_xor(yy, sign); }
static inline A0 atan(const A0& a0) { typedef typename meta::scalar_of<A0>::type sA0; static const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); static const A0 Twothird = double_constant<A0, 0x3fe51eb851eb851fll>(); static const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); A0 x = abs(a0); const A0 flag1 = lt(x, double_constant<A0, 0x4003504f333f9de6ll>()); //tan3pio8 const A0 flag2 = b_and(ge(x, double_constant<A0, 0x3fda827999fcef31ll>()), flag1); //tanpio8 A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); // static const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), double_constant<A0, 0x3c91a62633145c07ll>())); z = z+b_notand(flag1, double_constant<A0, 0x3c91a62633145c07ll>()); yy = yy + z; return b_xor(yy, bitofsign(a0)); }
static inline T laguerre_next(const uint32_t& n, const T& x, const T1 &Ln, const T2& Lnm1) { const T np1 = T(oneplus(n)); return ((T(n) + np1 - x) * Ln - n *Lnm1) / np1; }
static inline A0 cos_eval(const A0& z, const A0&, const A0&) { const A0 y = horner< NT2_HORNER_COEFF_T(A0, 3, (0x37ccf5ce, 0xbab60619, 0x3d2aaaa5) ) > (z); return oneplus(madd(z,Mhalf<A0>(), y*sqr(z))); }