static inline A0_n kernel_atan(const A0_n a0_n) { typedef typename meta::scalar_of<A0>::type sA0; const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); // static const A0 Twothird = double_constant<A0, 0x3fe51eb851eb851fll>(); const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); A0 x = { a0_n }; const A0 flag1 = lt(x, tan3pio8); //tan3pio8 const A0 flag2 = b_and(ge(x, tanpio8), flag1); //tanpio8 A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), morebits)); z = z+b_notand(flag1, morebits); return yy + z; }
static inline A0 asin(const A0& a0) { A0 sign, x; // bf::tie(sign, x) = sign_and_abs(a0); x = abs(a0); sign = bitofsign(a0); A0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; A0 x_larger_05 = gt(x, Half<A0>()); A0 x_else = b_or(x_smaller_1e_4, x_larger_05); A0 a = b_and(x, x_smaller_1e_4); A0 b = b_and(Half<A0>()*oneminus(x), x_larger_05); A0 z = b_or(b_or(b_notand(x_else, sqr(x)), a), b); x = b_notand(x_else, x); a = b_and(sqrt(z), x_larger_05); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
static inline A0 atan(const A0& a0) { A0 x, sign; x = nt2::abs(a0); sign = bitofsign(a0); // bf::tie(sign, x) = sign_and_abs(a0); const A0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8); const A0 flag2 = b_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, (minusone(x)/oneplus(x)),xx); const A0 z = sqr(xx); A0 z1 = madd(z, single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); yy = add(yy, madd(xx, mul( z1, z), xx)); return b_xor(yy, sign); }