static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; A0 sign, x; x = nt2::abs(a0); sign = bitofsign(a0); const bA0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; const bA0 x_larger_05 = gt(x, Half<A0>()); const bA0 x_else = logical_or(x_smaller_1e_4, x_larger_05); A0 a = if_else_zero(x_smaller_1e_4, x); const A0 b = if_else_zero(x_larger_05, Half<A0>()*oneminus(x)); A0 z = b_or(b_or(if_zero_else(x_else, sqr(x)), a), b); x = if_zero_else(x_else, x); a = if_else_zero(x_larger_05, sqrt(z)); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
static inline A0 sincosa(const A0& a0, A0& c, const regular&) { A0 s; if (nt2::is_invalid(a0)) { c = nt2::Nan<A0>(); return c; } const A0 x = nt2::abs(a0); static const sint_type de = static_cast<sint_type>(sizeof(sint_type)*8-1); A0 xr; const int_type n = redu_t::reduce(x, xr); const int_type swap_bit = n&One<int_type>(); const A0 z = nt2::sqr(xr); const int_type cos_sign_bit = shli(nt2::bitwise_xor(swap_bit, (n&nt2::Two<int_type>())>>1), de); const A0 sin_sign_bit = nt2::bitwise_xor(bitofsign(a0), nt2::shli(n&Two<int_type>(), de-1)); if (nt2::is_nez(swap_bit)) { c = eval_t::sin_eval(z, xr); s = eval_t::cos_eval(z); } else { c = eval_t::cos_eval(z); s = eval_t::sin_eval(z, xr); } c = nt2::bitwise_xor(c,cos_sign_bit); return nt2::bitwise_xor(s,sin_sign_bit); }
static inline A0 asin(const A0& a0) { A0 sign, x; // bf::tie(sign, x) = sign_and_abs(a0); x = abs(a0); sign = bitofsign(a0); A0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; A0 x_larger_05 = gt(x, Half<A0>()); A0 x_else = b_or(x_smaller_1e_4, x_larger_05); A0 a = b_and(x, x_smaller_1e_4); A0 b = b_and(Half<A0>()*oneminus(x), x_larger_05); A0 z = b_or(b_or(b_notand(x_else, sqr(x)), a), b); x = b_notand(x_else, x); a = b_and(sqrt(z), x_larger_05); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
static inline A0 sina(const A0& a0) { static const sint_type de = sizeof(sint_type)*8-1; if (is_invalid(a0)) return Nan<A0>(); A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { return redu_t::sin_replacement(a0); } else { A0 xr = Nan<A0>(), xc; int_type n = redu_t::reduce(x, xr, xc); int_type swap_bit = n&One<int_type>(); A0 sign_bit = b_xor(bitofsign(a0), shli(n&Two<int_type>(), de-1)); A0 z = sqr(xr); if (is_eqz(swap_bit)) { z = eval_t::sin_eval(z, xr, xc); } else { z = eval_t::cos_eval(z, xr, xc); } return b_xor(z,sign_bit); } }
static inline A0 asin(const A0& a0) { A0 sign, x, z; // bf::tie(sign, x) = sign_and_abs(a0); x = nt2::abs(a0); sign = bitofsign(a0); if ((x < single_constant<A0,0x38d1b717>())) return a0; if ((x > One<A0>())) return Nan<A0>(); bool bx_larger_05 = (x > Half<A0>()); if (bx_larger_05) { z = Half<A0>()*oneminus(x); x = sqrt(z); } else { z = sqr(x); } A0 z1 = madd(z, single_constant<A0,0x3d2cb352>(), single_constant<A0,0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0,0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0,0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0,0x3e2aaae4>()); z1 = madd(z1, z*x, x); if(bx_larger_05) { z1 = z1+z1; z1 = Pio_2<A0>()-z1; } return b_xor(z1, sign); }
static inline void sincosa(const A0& a0, A0& s, A0& c) { if (is_invalid(a0)) { s = c = Nan<A0>(); return; } A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { redu_t::sincos_replacement(a0, s, c); } else { static const sint_type de = sizeof(sint_type)*8-1; A0 xr, xc; int_type n = redu_t::reduce(x, xr, xc); int_type swap_bit = n&One<int_type>(); A0 z = sqr(xr); int_type cos_sign_bit = shli(b_xor(swap_bit, (n&Two<int_type>())>>1), de); A0 sin_sign_bit = b_xor(bitofsign(a0), shli(n&Two<int_type>(), de-1)); if (is_nez(swap_bit)) { c = eval_t::sin_eval(z, xr, xc); s = eval_t::cos_eval(z, xr, xc); } else { c = eval_t::cos_eval(z, xr, xc); s = eval_t::sin_eval(z, xr, xc); } c = b_xor(c,cos_sign_bit); s = b_xor(s,sin_sign_bit); } }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; typedef typename meta::scalar_of<A0>::type sA0; A0 x = nt2::abs(a0); const A0 pio4 = Pio_4<A0>(); const bA0 small= lt(x, Sqrteps<A0>()); const A0 morebits = double_constant<A0, 0xbc91a62633145c07ll>(); const A0 ct1 = double_constant<A0, 0x3fe4000000000000ll>(); A0 zz1 = oneminus(x); const A0 vp = zz1*horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz1)/ horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz1); zz1 = sqrt(zz1+zz1); A0 z = pio4-zz1; zz1 = fma(zz1, vp, morebits); z = z-zz1; zz1 = z+pio4; A0 zz2 = sqr(a0); z = zz2*horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz2)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz2); zz2 = x*z+x; return if_nan_else( gt(x, One<A0>()) , b_xor ( select( small , x , select( gt(x, ct1) , zz1 , zz2 ) ) , bitofsign(a0) ) ); }
static inline A0 cota(const A0& a0, const regular&) { if (nt2::is_invalid(a0)||redu_t::cot_invalid(a0)) return Nan<A0>(); const A0 x = nt2::abs(a0); const A0 bos = bitofsign(a0); if (!a0) return b_or(Inf<A0>(), bos); A0 xr = Nan<A0>(), xc; const int_type n = redu_t::reduce(x, xr, xc); const A0 y = eval_t::cot_eval(xr, xc, 1-((n&1)<<1)); return b_xor(y, bos); }
static inline A0 tana(const A0& a0, const regular&) { if (is_invalid(a0)||redu_t::tan_invalid(a0)) return Nan<A0>(); if (is_eqz(a0)) return a0; const A0 x = nt2::abs(a0); A0 xr = Nan<A0>(), xc, y; const int_type n = redu_t::reduce(x, xr, xc); y = eval_t::tan_eval(xr, xc, 1-((n&1)<<1)); // 1 -- n even //-1 -- n odd return b_xor(y, bitofsign(a0)); }
static inline A0 asin(const A0& a0) { A0 x = nt2::abs(a0); if ((x > One<A0>())) return Nan<A0>(); if ((x < Sqrteps<A0>())) return a0; A0 zz; if((x > double_constant<double,0x3fe4000000000000ll> ())) //0.625; { zz = oneminus(x); const A0 vp = zz*horner< NT2_HORNER_COEFF_T(stype, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz)/ horner< NT2_HORNER_COEFF_T(stype, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz); zz = sqrt(zz+zz); A0 z = Pio_4<A0>()-zz; zz = madd(zz, vp, double_constant<double,0xbc91a62633145c07ll>()); z = z-zz; zz = z+Pio_4<A0>(); } else { zz = sqr(x); A0 z = zz*horner< NT2_HORNER_COEFF_T(stype, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz); zz = x*z+x; } return b_xor(bitofsign(a0), zz); }
static inline A0 cota(const A0& a0) { if (nt2::is_invalid(a0)||redu_t::cot_invalid(a0)) return Nan<A0>(); A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { return redu_t::cot_replacement(a0); } else { const A0 bos = bitofsign(a0); if (!a0) return b_or(Inf<A0>(), bos); A0 xr = Nan<A0>(), xc, y; int_type n = redu_t::reduce(x, xr, xc); y = eval_t::cot_eval(xr, xc, 1-((n&1)<<1)); return b_xor(y, bos); } }
static inline A0 atan(const A0& a0) { A0 x, sign; x = nt2::abs(a0); sign = bitofsign(a0); // bf::tie(sign, x) = sign_and_abs(a0); const A0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8); const A0 flag2 = b_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, (minusone(x)/oneplus(x)),xx); const A0 z = sqr(xx); A0 z1 = madd(z, single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); yy = add(yy, madd(xx, mul( z1, z), xx)); return b_xor(yy, sign); }
static inline A0 tana(const A0& a0) { if (is_invalid(a0)||redu_t::tan_invalid(a0)) return Nan<A0>(); if (is_eqz(a0)) return a0; A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { return redu_t::tan_replacement(a0); } else { A0 xr = Nan<A0>(), xc, y; int_type n = redu_t::reduce(x, xr, xc); y = eval_t::tan_eval(xr, xc, 1-((n&1)<<1)); // 1 -- n even //-1 -- n odd return b_xor(y, bitofsign(a0)); } }
static inline A0 atan(const A0& a0) { if (is_eqz(a0)) return a0; if (is_inf(a0)) return Pio_2<A0>()*sign(a0); A0 y; A0 x = nt2::abs(a0); A0 sgn = bitofsign(a0); if( x >single_constant<float,0x401a827a>())//2.414213562373095 ) /* tan 3pi/8 */ { y = Pio_2<A0>(); x = -rec(x); } else if( x > single_constant<float,0x3ed413cd>()) //0.4142135623730950f ) /* tan pi/8 */ { y = Pio_4<A0>(); x = minusone(x)/oneplus(x); } else y = 0.0; A0 z = sqr(x); A0 z1 = madd(z, single_constant<A0,0x3da4f0d1>(),single_constant<A0,0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0,0x3e4c925f>(),single_constant<A0,0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); y = add(y, madd(x, mul( z1, z), x)); // y += // ((( 8.05374449538e-2 * z // - 1.38776856032E-1) * z // + 1.99777106478E-1) * z // - 3.33329491539E-1) * z * x // + x; return b_xor(sgn, y ); }
static inline A0 atan(const A0& a0) { typedef typename meta::scalar_of<A0>::type sA0; static const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); static const A0 Twothird = double_constant<A0, 0x3fe51eb851eb851fll>(); static const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); A0 x = abs(a0); const A0 flag1 = lt(x, double_constant<A0, 0x4003504f333f9de6ll>()); //tan3pio8 const A0 flag2 = b_and(ge(x, double_constant<A0, 0x3fda827999fcef31ll>()), flag1); //tanpio8 A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); // static const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), double_constant<A0, 0x3c91a62633145c07ll>())); z = z+b_notand(flag1, double_constant<A0, 0x3c91a62633145c07ll>()); yy = yy + z; return b_xor(yy, bitofsign(a0)); }
static inline A0_n atan(const A0_n a0_n) { const A0 a0 = {a0_n}; const A0 x = {kernel_atan(a0)}; return b_xor(x, bitofsign(a0)); }
(real_<A0>) ) namespace nt2 { namespace ext { template<class Dummy> struct call<tag::acotd_(tag::real_), tag::cpu_, Dummy> : callable { template<class Sig> struct result; template<class This,class A0> struct result<This(A0)> : meta::strip<A0>{}; NT2_FUNCTOR_CALL(1) { A0 s = bitofsign(a0); if(!a0) return b_or(Ninety<A0>(), s); if(is_inf(a0)) return b_or(Zero<A0>(), s); return b_or(Ninety<A0>()-atand(abs(a0)), s); } }; } } NT2_REGISTER_DISPATCH(tag::acotd_, tag::cpu_, (A0), (arithmetic_<A0>) ) namespace nt2 { namespace ext {
static inline A0 atan(const A0& a0) { A0 x = kernel_atan(a0); return b_xor(x, bitofsign(a0)); }