// computes dawson(a0)/a0 for double or double vectors // xx is sqr(a0) and 3.25 <= abs(a0) <= 6.25 static BOOST_FORCEINLINE A0 dawson2(const A0& ox2, const A0 & x) { /* interval 3.25 to 6.25 */ A0 num = horner<NT2_HORNER_COEFF_T(sA0, 11, (0x3f024ae3, 0xbe7aa0e7, 0x3dc0d25d, 0xbcb32b13, 0x3b6fff71, 0xb9dde22a, 0x3816d831, 0xb6100aef, 0x33c36c5f, 0xb1251d8a, 0x2e1dfe15 ) )> (ox2); A0 denom = horner<NT2_HORNER_COEFF_T(sA0, 11, (0x3f800000, 0xbf21c042, 0x3e726344, 0xbd59d3f0, 0x3c0af173, 0xba7882fc, 0x38a3cafe, 0xb698f761, 0x344b0b83, 0xb1a8d160, 0x2e9dfe10 ) )> (ox2); return nt2::rec(x)+ox2*num/(denom*x); }
static inline A0_n kernel_atan(const A0_n a0_n) { typedef typename meta::scalar_of<A0>::type sA0; const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); const A0 a0 = {a0_n}; const A0 x = nt2::abs(a0); const bA0 flag1 = lt(x, tan3pio8); const bA0 flag2 = logical_and(ge(x, tanpio8), flag1); A0 yy = if_zero_else(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), morebits)); z = z+if_zero_else(flag1, morebits); return yy + z; }
// computes dawson(a0)/a0 for double or double vectors // xx is sqr(a0) and 3.25 <= abs(a0) <= 6.25 static BOOST_FORCEINLINE A0 dawson2(const A0& ox2, const A0 & x) { /* interval 3.25 to 6.25 */ A0 num = horner<NT2_HORNER_COEFF_T(sA0, 11, (0x3fe0495c52fe411ell,//(5.08955156417900903354E-1), 0xbfcf541cdebcb905ll,//(-2.44754418142697847934E-1) 0x3fb81a4b94e413c5ll,//(9.41512335303534411857E-2), 0xbf966562633da983ll,//(-2.18711255142039025206E-2) 0x3f6dffee25eba9bdll,//(3.66207612329569181322E-3), 0xbf3bbc454e5479acll,//(-4.23209114460388756528E-4) 0x3f02db061d28d773ll,//(3.59641304793896631888E-5), 0xbec2015dd001fa5bll,//(-2.14640351719968974225E-6) 0x3e786d8be5016991ll,//(9.10010780076391431042E-8), 0xbe24a3b14d9709f0ll,//(-2.40274520828250956942E-9) 0x3dc3bfc2ac32b39ell //(3.59233385440928410398E-11) ) )> (ox2); A0 denom = horner<NT2_HORNER_COEFF_T(sA0, 11, (0x3ff0000000000000ll, //(1.00000000000000000000E0), 0xbfe438083f2d47c7ll, //(-6.31839869873368190192E-1) 0x3fce4c6875173c3ell, //(2.36706788228248691528E-1), 0xbfab3a7e0ed1122bll, //(-5.31806367003223277662E-2) 0x3f815e2e53c1fb60ll, //(8.48041718586295374409E-3), 0xbf4f105f8f05c7d8ll, //(-9.47996768486665330168E-4) 0x3f14795fc069cc34ll, //(7.81025592944552338085E-5), 0xbed31eec145c9b53ll, //(-4.55875153252442634831E-6) 0x3e8961705729c1cdll, //(1.89100358111421846170E-7), 0xbe351a2c0f7cf15cll, //(-4.91324691331920606875E-9) 0x3dd3bfc202a6b560ll //(7.18466403235734541950E-11) ) )> (ox2); return nt2::rec(x)+ox2*num/(denom*x); }
static inline void kernel_log(const A0& a0, A0& dk, A0& hfsq, A0& s, A0& R, A0& f) { typedef typename meta::as_integer<A0, signed>::type int_type; typedef typename meta::scalar_of<A0>::type sA0; A0 x; int_type k; boost::fusion::tie(x, k) = fast_frexp(a0); const int_type x_lt_sqrthf = simd::native_cast<int_type>(gt(Sqrt_2o_2<A0>(), x)); k = k+x_lt_sqrthf; f = minusone(x+b_and(x, x_lt_sqrthf)); dk = tofloat(k); s = f/add(Two<A0>(),f); A0 z = sqr(s); A0 w = sqr(z); A0 t1= w*horner<NT2_HORNER_COEFF_T(sA0, 3, (0x3fc39a09d078c69fll, 0x3fcc71c51d8e78afll, 0x3fd999999997fa04ll) )> (w); A0 t2= z*horner<NT2_HORNER_COEFF_T(sA0, 4, (0x3fc2f112df3e5244ll, 0x3fc7466496cb03dell, 0x3fd2492494229359ll, 0x3fe5555555555593ll) )> (w); R = t2+t1; hfsq = mul(Half<A0>(), sqr(f)); }
template < class AA0 > static inline AA0 branch2(const AA0 & x) { typedef typename meta::scalar_of<AA0>::type stype; AA0 q = rec(x); AA0 w = sqrt(q); AA0 p3 = w * horner< NT2_HORNER_COEFF_T(stype, 8, (0x3d8d98f9, 0xbe69f6b3, 0x3ea0ad85, 0xbe574699, 0x3bb21b25, 0x3e18ec50, 0x36a6f7c5, 0x3f4c4229 ) ) > (q); w = sqr(q); AA0 xn = q* horner< NT2_HORNER_COEFF_T(stype, 8, (0xc233e16d, 0x424af04a, 0xc1c6dca7, 0x40e72299, 0xbfc5bd69, 0x3eb364d9, 0xbe27bad7, 0x3ebfffdd ) ) > (w)-single_constant<AA0,0x4016cbe4 > (); return if_zero_else(eq(x, Inf<AA0>()), p3*cos(xn+x)); }
static inline void kernel_log(const A0& a0, A0& dk, A0& hfsq, A0& s, A0& R, A0& f) { typedef typename meta::as_integer<A0, signed>::type int_type; typedef typename meta::scalar_of<A0>::type sA0; A0 x; int_type k; nt2::fast_frexp(a0, x, k); const int_type x_lt_sqrthf = nt2::is_greater(nt2::Sqrt_2o_2<A0>(), x)?nt2::Mone<int_type>():nt2::Zero<int_type>(); k += x_lt_sqrthf; f = nt2::minusone(x+nt2::b_and(x, nt2::genmask(x_lt_sqrthf))); dk = nt2::tofloat(k); s = f/nt2::add(nt2::Two<A0>(),f); A0 z = nt2::sqr(s); A0 w = nt2::sqr(z); A0 t1= w*nt2::horner<NT2_HORNER_COEFF_T(sA0, 3, (0x3fc39a09d078c69fll, 0x3fcc71c51d8e78afll, 0x3fd999999997fa04ll) )> (w); A0 t2= z*horner<NT2_HORNER_COEFF_T(sA0, 4, (0x3fc2f112df3e5244ll, 0x3fc7466496cb03dell, 0x3fd2492494229359ll, 0x3fe5555555555593ll) )> (w); R = t2+t1; hfsq = nt2::mul(Half<A0>(), nt2::sqr(f)); }
static inline void kernel_log(const A0& a0, A0& dk, A0& hfsq, A0& s, A0& R, A0& f) { A0 x; int_type k(fast_frexp(a0, x)); const int_type x_lt_sqrthf = -isgt(Sqrt_2o_2<A0>(), x); k += x_lt_sqrthf; f = minusone(x+b_and(x, genmask<A0>(x_lt_sqrthf))); dk = tofloat(k); s = f/add(Two<A0>(),f); A0 z = sqr(s); A0 w = sqr(z); A0 t1= w*horner<NT2_HORNER_COEFF_T(A0, 3, (0x3fc39a09d078c69fll, 0x3fcc71c51d8e78afll, 0x3fd999999997fa04ll) )> (w); A0 t2= z*horner<NT2_HORNER_COEFF_T(A0, 4, (0x3fc2f112df3e5244ll, 0x3fc7466496cb03dell, 0x3fd2492494229359ll, 0x3fe5555555555593ll) )> (w); R = t2+t1; hfsq = mul(Half<A0>(), sqr(f)); }
template < class AA0 > static inline AA0 branch2(const AA0 & a0) { typedef typename meta::scalar_of<AA0>::type sAA0; AA0 q = rec(a0); AA0 w = sqrt(q); AA0 p3 = w * horner< NT2_HORNER_COEFF_T(sAA0, 8, (0xbd8c100e, 0x3e3ef887, 0xbe5ba616, 0x3df54214, 0xbb69539e, 0xbd4b8bc1, 0xb6612dc2, 0x3f4c422a ) ) > (q); w = sqr(q); AA0 xn = q* horner< NT2_HORNER_COEFF_T(sAA0, 8, (0x4201aee0, 0xc2113945, 0x418c7f6a, 0xc09f3306, 0x3f8040aa, 0xbe46a57f, 0x3d84ed6e, 0xbdffff97 ) ) > (w)-Pio_4<AA0>(); return if_zero_else(eq(a0, Inf<AA0>()), p3*nt2::sin(xn+a0)); }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; typedef typename meta::scalar_of<A0>::type sA0; A0 x = nt2::abs(a0); const A0 pio4 = Pio_4<A0>(); const bA0 small= lt(x, Sqrteps<A0>()); const A0 morebits = double_constant<A0, 0xbc91a62633145c07ll>(); const A0 ct1 = double_constant<A0, 0x3fe4000000000000ll>(); A0 zz1 = oneminus(x); const A0 vp = zz1*horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz1)/ horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz1); zz1 = sqrt(zz1+zz1); A0 z = pio4-zz1; zz1 = fma(zz1, vp, morebits); z = z-zz1; zz1 = z+pio4; A0 zz2 = sqr(a0); z = zz2*horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz2)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz2); zz2 = x*z+x; return if_nan_else( gt(x, One<A0>()) , b_xor ( select( small , x , select( gt(x, ct1) , zz1 , zz2 ) ) , bitofsign(a0) ) ); }
static inline A0 asin(const A0& a0) { A0 x = nt2::abs(a0); if ((x > One<A0>())) return Nan<A0>(); if ((x < Sqrteps<A0>())) return a0; A0 zz; if((x > double_constant<double,0x3fe4000000000000ll> ())) //0.625; { zz = oneminus(x); const A0 vp = zz*horner< NT2_HORNER_COEFF_T(stype, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz)/ horner< NT2_HORNER_COEFF_T(stype, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz); zz = sqrt(zz+zz); A0 z = Pio_4<A0>()-zz; zz = madd(zz, vp, double_constant<double,0xbc91a62633145c07ll>()); z = z-zz; zz = z+Pio_4<A0>(); } else { zz = sqr(x); A0 z = zz*horner< NT2_HORNER_COEFF_T(stype, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz); zz = x*z+x; } return b_xor(bitofsign(a0), zz); }
static inline A0 base_tan_eval(const A0& x) { const A0 zz = sqr(x); const A0 num = horner< NT2_HORNER_COEFF_T(stype, 3, (0xc0c992d8d24f3f38ll, 0x413199eca5fc9dddll, 0xc1711fead3299176ll))>(zz); const A0 den = horner< NT2_HORNER_COEFF_T(stype, 5, (0x3ff0000000000000ll, 0x40cab8a5eeb36572ll, 0xc13427bc582abc96ll, 0x4177d98fc2ead8efll, 0xc189afe03cbe5a31ll))>(zz); return x+ x*(zz*(num/den)); }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = a0_n; A0 sign, x; x = nt2::abs(a0); sign = nt2::bitofsign(a0); const bA0 x_larger_05 = gt(x, nt2::Half<A0>()); A0 z = if_else(x_larger_05, nt2::Half<A0>()*nt2::oneminus(x), nt2::sqr(x)); x = if_else(x_larger_05, sqrt(z), x); // remez polynomial of degree 4 for (asin(rx)-rx)/(rx*rx*rx) in [0, 0.25] // 2120752146 values (99.53%) within 0.0 ULPs // 9954286 values (0.47%) within 0.5 ULPs // 4.0 cycles/element SSE4.2 g++-4.8 A0 z1 = horner<NT2_HORNER_COEFF_T(sA0, 5, ( 0x3d2cb352, 0x3cc617e3, 0x3d3a3ec7, 0x3d9980f6, 0x3e2aaae4 ) )> (z); z1 = nt2::fma(z1, z*x, x); z = if_else(x_larger_05, nt2::Pio_2<A0>()-(z1+z1), z1); return nt2::b_xor(z, sign); }
static inline A0_n kernel_atan(const A0_n a0_n) { //4278190076 values computed in range: [-3.40282e+38, 3.40282e+38] //4257598358 values (99.52%) within 0.0 ULPs // 20591718 values (0.48%) within 0.5 ULPs const A0 a0 = a0_n; const A0 x = nt2::abs(a0); //here x is positive const bA0 flag1 = nt2::lt(x, Tan_3pio_8<A0>()); const bA0 flag2 = nt2::logical_and(nt2::ge(x,single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = nt2::if_zero_else(flag1, Pio_2<A0>()); yy = nt2::if_else(flag2, Pio_4<A0>(), yy); A0 xx = nt2::if_else(flag1, x, -rec(x)); xx = nt2::if_else(flag2, (nt2::minusone(x)/nt2::oneplus(x)),xx); const A0 z = nt2::sqr(xx); A0 z1 = horner<NT2_HORNER_COEFF_T(sA0, 4, ( 0x3da4f0d1ul // 8.5460119e-02 , 0xbe0e1b85ul // -1.4031009e-01 , 0x3e4c925ful // 1.9991724e-01 , 0xbeaaaa2aul // -3.3333293e-01 ) )> (z); z1 = nt2::fma(xx, nt2::mul( z1, z), xx); z1 = ifadd(flag2, z1, Pio_4lo<A0>()); z1 = ifnotadd(flag1, z1, Pio_2lo<A0>()); return yy+z1; }
static inline A0 atan(const A0& a0) { // static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>(); if (is_eqz(a0)) return a0; if (is_inf(a0)) return Pio_2<A0>()*sign(a0); A0 x = nt2::abs(a0); A0 y; A0 flag = (x > double_constant<double,0x4003504f333f9de6ll>()); if (flag) { y = Pio_2<A0>(); x = -rec(x); } else if ((x <= double_constant<double,0x3fe51eb851eb851fll>())) { y = Zero<A0>(); } else { y = Pio_4<A0>(); flag = Half<A0>(); x = minusone(x)/oneplus(x); } A0 z = sqr(x); z = z*horner< NT2_HORNER_COEFF_T(stype, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = madd(x, z, x); static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>(); z += flag * morebits; y = y + z; if( is_ltz(a0) ) y = -y; return(y); }
// computes sinh for abs(a0) < 1 and x2 = sqr(a0) for doubles static BOOST_FORCEINLINE A0 compute(const A0& a0, const A0& x2) { typedef typename meta::scalar_of<A0>::type sA0; return fma(a0, x2*horner<NT2_HORNER_COEFF_T(sA0, 4, ( 0xbfe9435fe8bb3cd6ull, // -7.89474443963537015605E-1, 0xc064773a398ff4feull, // -1.63725857525983828727E2, 0xc0c694b8c71d6182ull, // -1.15614435765005216044E4, 0xc115782bdbf6ab05ull // -3.51754964808151394800E5 ) )> (x2)/ horner<NT2_HORNER_COEFF_T(sA0, 4, ( 0x3ff0000000000000ull, // 1.00000000000000000000E0, 0xc0715b6096e96484ull, // -2.77711081420602794433E2, 0x40e1a7ba7ed72245ull, // 3.61578279834431989373E4, 0xc1401a20e4f90044ull // -2.11052978884890840399E6 ) )> (x2), a0); }
static inline A0 approx(const A0& x) { typedef typename meta::scalar_of<A0>::type sA0; const A0 t = nt2::sqr(x); return x - t*nt2::horner<NT2_HORNER_COEFF_T( sA0, 3, (0x3888d272, 0xbb360954, 0x3e2aaaaa) )> (t); }
static inline A0 approx(const A0& x) { typedef typename meta::scalar_of<A0>::type sA0; A0 xx = sqr(x); A0 px = x*horner<NT2_HORNER_COEFF_T(sA0, 4, (0x3fa4fd75f3062dd4ll, 0x40277d9474c55934ll, 0x40796b7a050349e4ll, 0x40a2b4798e134a01ll) )> (xx); A0 x2 = px/( horner<NT2_HORNER_COEFF_T(sA0, 4, (0x3ff0000000000000ll, 0x405545fdce51ca08ll, 0x4093e05eefd67782ll, 0x40a03f37650df6e2ll) )> (xx)-px); return oneplus(x2+x2); }
static inline A0 sin_eval(const A0& z, const A0& x) { const A0 y1 = horner< NT2_HORNER_COEFF_T(stype, 6, (0x3de5d8fd1fcf0ec1ll, 0xbe5ae5e5a9291691ll, 0x3ec71de3567d4896ll, 0xbf2a01a019bfdf03ll, 0x3f8111111110f7d0ll, 0xbfc5555555555548ll) ) > (z); return madd(mul(y1,z),x,x); }
static inline A0 base_tancot_eval(const A0& z) { const A0 zz = sqr(z); return horner< NT2_HORNER_COEFF_T(stype, 6, (0x3c19c53b, 0x3b4c779c, 0x3cc821b5, 0x3d5ac5c9, 0x3e0896dd, 0x3eaaaa6f))>(zz)*zz*z+z; }
// computes sinh for abs(a0) < 1 and x2 = sqr(a0) for float static BOOST_FORCEINLINE A0 compute(const A0& a0, const A0& x2) { typedef typename meta::scalar_of<A0>::type sA0; return horner < NT2_HORNER_COEFF_T(sA0, 4, ( 0x39559e2f, // 2.03721912945E-4f 0x3c087bbe, // 8.33028376239E-3f 0x3e2aaacc, // 1.66667160211E-1f 0x3f800000 // 1.0f ) )> (x2)*a0; }
static inline A0 cos_eval(const A0& z) { const A0 y = horner< NT2_HORNER_COEFF_T(stype, 7, (0x3da8ff831ad9b219ll, 0xbe21eea7c1e514d4ll, 0x3e927e4f8e06d9a5ll, 0xbefa01a019ddbcd9ll, 0x3f56c16c16c15d47ll, 0xbfa5555555555551ll, 0x3fe0000000000000ll) ) > (z); return oneminus(y*z); }
static inline A0 approx(const A0& x) { typedef typename meta::scalar_of<A0>::type sA0; A0 const t = sqr(x); return x - t*horner<NT2_HORNER_COEFF_T(sA0, 5, ( 0x3e66376972bea4d0ll, 0xbebbbd41c5d26bf1ll , 0x3f11566aaf25de2cll, 0xbf66c16c16bebd93ll , 0x3fc555555555553ell ) )> (t); }
static inline A0_n sin_eval(const A0_n z_n, const A0& x) { const A0 z = z_n; const A0 y1 = nt2::horner< NT2_HORNER_COEFF_T(stype, 6, (0x3de5d8fd1fcf0ec1ll, 0xbe5ae5e5a9291691ll, 0x3ec71de3567d4896ll, 0xbf2a01a019bfdf03ll, 0x3f8111111110f7d0ll, 0xbfc5555555555548ll) ) > (z); return nt2::fma(y1*z,x,x); }
static inline A0 base_tan_eval(const A0& z) { const A0 zz = nt2::sqr(z); A0 y = nt2::horner< NT2_HORNER_COEFF_T(A0, 6, (0x3c19c53b, 0x3b4c779c, 0x3cc821b5, 0x3d5ac5c9, 0x3e0896dd, 0x3eaaaa6f))>(zz)*zz*z+z; return y; }
// computes dawson(a0)/a0 for double or double vectors // xx is sqr(a0) and 6.25 < abs(a0) < 1.0e9 static BOOST_FORCEINLINE A0 dawson3(const A0& ox2, const A0 & x) { /* 6.25 to infinity */ A0 num = horner<NT2_HORNER_COEFF_T(sA0, 5, (0xbf173118, 0x3f211590, 0xbe3101f1, 0x3c8708d6, 0xb9ff3ce5 ) )> (ox2); A0 denom = horner<NT2_HORNER_COEFF_T(sA0, 6, (0x3f800000, 0xc02caf51, 0x3fddc960, 0xbec9942c, 0x3d0d0443, 0xba7f3ce5 ) )> (ox2); return nt2::rec(x)+ox2*num/(denom*x); }
template < class AA0 > static inline AA0 branch1(const AA0& x) { typedef typename meta::scalar_of<AA0>::type stype; const AA0 z = sqr(x); return (z-single_constant<AA0,0x40b90fdc> ())* horner< NT2_HORNER_COEFF_T(stype, 5, (0xb382511c, 0x36d660a0, 0xb9d01fb1, 0x3c5a6271, 0xbe3110a6 ) ) > (z); }
template < class AA0 > static inline AA0 branch1(const AA0 & x) { typedef typename meta::scalar_of<AA0>::type stype; const AA0 z = sqr(x); return (z-single_constant<AA0,0x416ae95a> ())*x* horner< NT2_HORNER_COEFF_T(stype, 5, (0xb1a7a246, 0x35214df5, 0xb83e7a4f, 0x3afdefd1, 0xbd0b7da6 ) ) > (z); }
// computes dawson(a0)/a0 for double or double vectors // xx is sqr(a0) and 6.25 < abs(a0) < 1.0e9 static BOOST_FORCEINLINE A0 dawson3(const A0& ox2, const A0 & x) { /* 6.25 to infinity */ A0 num = horner<NT2_HORNER_COEFF_T(sA0, 5, (0xbfe2e622ffa7ef20ll, //(-5.90592860534773254987E-1) 0x3fe422b1f29fbcb6ll, //(6.29235242724368800674E-1), 0xbfc6203e2f0a174ell, //(-1.72858975380388136411E-1) 0x3f90e11ab3d4d36bll, //(1.64837047825189632310E-2), 0xbf3fe79cad3d09fbll //(-4.86827613020462700845E-4) ) )> (ox2); A0 denom = horner<NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll,//(1.00000000000000000000E0), 0xc00595ea2e7576e2ll,//(-2.69820057197544900361E0), 0x3ffbb92c0388a954ll,//(1.73270799045947845857E0), 0xbfd932857b438c94ll,//(-3.93708582281939493482E-1) 0x3fa1a0885fe44f2dll,//(3.44278924041233391079E-2), 0xbf4fe79cad3d0a8dll //(-9.73655226040941223894E-4) ) )> (ox2); return nt2::rec(x)+ox2*num/(denom*x); }
template < class AA0 > static inline AA0 branch1(const AA0 & a0) { typedef typename meta::scalar_of<AA0>::type sAA0; AA0 z = sqr(a0); AA0 p2 = (z-single_constant<AA0, 0x3edd4b3a> ())* horner< NT2_HORNER_COEFF_T(sAA0, 5, (0x33cb0920, 0xb71ded71, 0x3a0c1a3e, 0xbc81c8f4, 0x3e2edb4f ) ) > (z); return p2+single_constant<AA0, 0x3f22f983>()*log(a0)*j0(a0); }
static inline A0 atan(const A0& a0) { typedef typename meta::scalar_of<A0>::type sA0; static const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); static const A0 Twothird = double_constant<A0, 0x3fe51eb851eb851fll>(); static const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); A0 x = abs(a0); const A0 flag1 = lt(x, double_constant<A0, 0x4003504f333f9de6ll>()); //tan3pio8 const A0 flag2 = b_and(ge(x, double_constant<A0, 0x3fda827999fcef31ll>()), flag1); //tanpio8 A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); // static const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), double_constant<A0, 0x3c91a62633145c07ll>())); z = z+b_notand(flag1, double_constant<A0, 0x3c91a62633145c07ll>()); yy = yy + z; return b_xor(yy, bitofsign(a0)); }