static inline AA1 case_1(AA1 & x, int32_t sn, const AA1 & n) { typedef typename meta::scalar_of<AA1>::type sA1; /* Power series expansion */ AA1 eqzx = is_eqz(x); x = seladd(is_eqz(x), x, One<A1>()); //loop is infinite for x == 0 sA1 psi1 = Zero<sA1>(); for( int32_t i=sn-1; i; --i ) psi1 += rec((sA1)i); AA1 psi = -Euler<A1>()-nt2::log(x)+splat<A1>(psi1); AA1 t; AA1 z = -x; AA1 xk = Zero<A1>(); AA1 yk = One<A1>(); AA1 pk = One<A1>() - n; AA1 ans = ( sn == 1 ) ? Zero<A1>() : rec(pk); do { xk += One<AA1>(); yk *= z/xk; pk += One<AA1>(); ans = seladd(is_nez(pk), ans, yk/pk); t = select(is_nez(ans), nt2::abs(yk/ans), One<AA1>()); } while( nt2::bitwise_any(gt(t, Halfeps<A1>()))); return seladd(eqzx,(nt2::powi(z, sn-1) * psi / nt2::gamma(n)) - ans, Inf<A1>()); //TO DO pow->powi and gamma splatted from scalar or mere factorial call }
static inline A0 kernel_atan(const A0& a0) { if (is_eqz(a0)) return Zero<A0>(); if (is_inf(a0)) return Pio_2<A0>(); A0 x = nt2::abs(a0); A0 y; if( x >single_constant<A0,0x401a827a>())//2.414213562373095 ) /* tan 3pi/8 */ { y = Pio_2<A0>(); x = -rec(x); } else if( x > single_constant<A0,0x3ed413cd>()) //0.4142135623730950f ) /* tan pi/8 */ { y = Pio_4<A0>(); x = minusone(x)/oneplus(x); } else y = 0.0; A0 z = sqr(x); A0 z1 = madd(z, single_constant<A0,0x3da4f0d1>(),single_constant<A0,0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0,0x3e4c925f>(),single_constant<A0,0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); return add(y, madd(x, mul( z1, z), x)); // y += // ((( 8.05374449538e-2 * z // - 1.38776856032E-1) * z // + 1.99777106478E-1) * z // - 3.33329491539E-1) * z * x // + x; }
static inline A0 sina(const A0& a0) { static const sint_type de = sizeof(sint_type)*8-1; if (is_invalid(a0)) return Nan<A0>(); A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { return redu_t::sin_replacement(a0); } else { A0 xr = Nan<A0>(), xc; int_type n = redu_t::reduce(x, xr, xc); int_type swap_bit = n&One<int_type>(); A0 sign_bit = b_xor(bitofsign(a0), shli(n&Two<int_type>(), de-1)); A0 z = sqr(xr); if (is_eqz(swap_bit)) { z = eval_t::sin_eval(z, xr, xc); } else { z = eval_t::cos_eval(z, xr, xc); } return b_xor(z,sign_bit); } }
static inline A0 log10(const A0& a0) { if (a0 == Inf<A0>()) return a0; if (is_eqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); return -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>(); }
static inline A0 log(const A0& a0) { // ln(2)hi = 6.93147180369123816490e-01 or 0x3fe62e42fee00000 // ln(2)lo = 1.90821492927058770002e-10 or 0x3dea39ef35793c76 if (a0 == Inf<A0>()) return a0; if (is_eqz(a0)) return nt2::Minf<A0>(); if (nt2::is_nan(a0)||nt2::is_ltz(a0)) return nt2::Nan<A0>(); A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); return nt2::mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())- ((hfsq-(s*(hfsq+R)+nt2::mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f); }
static inline A0 tana(const A0& a0, const regular&) { if (is_invalid(a0)||redu_t::tan_invalid(a0)) return Nan<A0>(); if (is_eqz(a0)) return a0; const A0 x = nt2::abs(a0); A0 xr = Nan<A0>(), xc, y; const int_type n = redu_t::reduce(x, xr, xc); y = eval_t::tan_eval(xr, xc, 1-((n&1)<<1)); // 1 -- n even //-1 -- n odd return b_xor(y, bitofsign(a0)); }
static inline A0 finalize(const A0& a0, const A0& y) { #ifdef BOOST_SIMD_NO_NANS BOOST_AUTO_TPL(test, nt2::is_ltz(a0)); #else BOOST_AUTO_TPL(test, nt2::logical_or(nt2::is_ltz(a0), nt2::is_nan(a0))); #endif A0 y1 = nt2::if_nan_else(test, y); #ifndef BOOST_SIMD_NO_INFINITIES y1 = if_else(nt2::is_equal(a0, nt2::Inf<A0>()), a0, y1); #endif return if_else(is_eqz(a0), nt2::Minf<A0>(), y1); }
static inline A0 log(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<stA0, 0xb95e8083>(), y); y = madd(Mhalf<stA0>(), x2, y); A0 z = x + y; return madd(single_constant<stA0, 0x3f318000>(), fe, z); }
static inline A0 log2(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(Mhalf<stA0>(),x2, y); // multiply log of fraction by log2(e) A0 z = madd( x , single_constant<stA0, 0x3ee2a8ed>() , mul(y,single_constant<stA0, 0x3ee2a8ed>())// 0.44269504088896340735992 ); return ((z+y)+x)+fe; }
static inline A0 log10(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = amul(y, Mhalf<stA0>(), x2); // multiply log of fraction by log10(e) and base 2 exponent by log10(2) A0 z = mul(x+y, single_constant<stA0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo z = amul(z, y, single_constant<stA0, 0x3ede0000>()); //4.3359375E-1f // log10(e)hi z = amul(z, x, single_constant<stA0, 0x3ede0000>()); z = amul(z, fe, single_constant<stA0, 0x39826a14>());//3.0078125E-1f // log10(2)hi return amul(z, fe, single_constant<stA0, 0x3e9a0000 >());//2.48745663981195213739E-4f // log10(2)lo }
static inline A0 atan(const A0& a0) { // static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>(); if (is_eqz(a0)) return a0; if (is_inf(a0)) return Pio_2<A0>()*sign(a0); A0 x = nt2::abs(a0); A0 y; A0 flag = (x > double_constant<double,0x4003504f333f9de6ll>()); if (flag) { y = Pio_2<A0>(); x = -rec(x); } else if ((x <= double_constant<double,0x3fe51eb851eb851fll>())) { y = Zero<A0>(); } else { y = Pio_4<A0>(); flag = Half<A0>(); x = minusone(x)/oneplus(x); } A0 z = sqr(x); z = z*horner< NT2_HORNER_COEFF_T(stype, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = madd(x, z, x); static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>(); z += flag * morebits; y = y + z; if( is_ltz(a0) ) y = -y; return(y); }
static inline A0 tana(const A0& a0) { if (is_invalid(a0)||redu_t::tan_invalid(a0)) return Nan<A0>(); if (is_eqz(a0)) return a0; A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { return redu_t::tan_replacement(a0); } else { A0 xr = Nan<A0>(), xc, y; int_type n = redu_t::reduce(x, xr, xc); y = eval_t::tan_eval(xr, xc, 1-((n&1)<<1)); // 1 -- n even //-1 -- n odd return b_xor(y, bitofsign(a0)); } }
static inline A0 cota(const A0& a0) { A0 x = scale(a0); return b_or(b_or(not_in_range(a0), is_eqz(a0)), rec(eval_t::base_tancot_eval(x))); }