static inline A0 asin(const A0& a0) { A0 sign, x; // bf::tie(sign, x) = sign_and_abs(a0); x = abs(a0); sign = bitofsign(a0); A0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; A0 x_larger_05 = gt(x, Half<A0>()); A0 x_else = b_or(x_smaller_1e_4, x_larger_05); A0 a = b_and(x, x_smaller_1e_4); A0 b = b_and(Half<A0>()*oneminus(x), x_larger_05); A0 z = b_or(b_or(b_notand(x_else, sqr(x)), a), b); x = b_notand(x_else, x); a = b_and(sqrt(z), x_larger_05); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; A0 sign, x; x = nt2::abs(a0); sign = bitofsign(a0); const bA0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; const bA0 x_larger_05 = gt(x, Half<A0>()); const bA0 x_else = logical_or(x_smaller_1e_4, x_larger_05); A0 a = if_else_zero(x_smaller_1e_4, x); const A0 b = if_else_zero(x_larger_05, Half<A0>()*oneminus(x)); A0 z = b_or(b_or(if_zero_else(x_else, sqr(x)), a), b); x = if_zero_else(x_else, x); a = if_else_zero(x_larger_05, sqrt(z)); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
static inline A0 log10(const A0& a0) { A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); A0 y2 = -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>(); A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline void sincosa(const A0& a0, A0& s, A0& c) { A0 test = not_in_range(a0); A0 x = scale(a0); A0 z = sqr(x); c = b_or(test, eval_t::cos_eval(z, x, Zero<A0>())); s = b_or(test, eval_t::sin_eval(z, x, Zero<A0>())); // c = cosa(a0); // s = sina(a0); }
static inline A0 log2(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(Mhalf<A0>(),x2, y); // multiply log of fraction by log2(e) A0 z = madd(x,single_constant<A0, 0x3ee2a8ed>(),mul(y,single_constant<A0, 0x3ee2a8ed>()));// 0.44269504088896340735992 A0 z1 = ((z+y)+x)+fe; A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing return seladd(is_inf(y1),b_or(z1, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log(const A0& a0) { // ln(2)hi = 6.93147180369123816490e-01 or 0x3fe62e42fee00000 // ln(2)lo = 1.90821492927058770002e-10 or 0x3dea39ef35793c76 A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); A0 y2 = mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())- ((hfsq-(s*(hfsq+R)+mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f); A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log10(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = amul(y, -Half<A0>(), x2); // multiply log of fraction by log10(e) and base 2 exponent by log10(2) A0 z = mul(x+y, single_constant<A0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo z = amul(z, y, single_constant<A0, 0x3ede0000>()); //4.3359375E-1f // log10(e)hi z = amul(z, x, single_constant<A0, 0x3ede0000>()); z = amul(z, fe, single_constant<A0, 0x39826a14>());//3.0078125E-1f // log10(2)hi z = amul(z, fe, single_constant<A0, 0x3e9a0000>());//2.48745663981195213739E-4f // log10(2)lo A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing return seladd(is_inf(y1), b_or(z, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; typedef typename meta::scalar_of<A0>::type sA0; A0 x = nt2::abs(a0); static const A0 pio4 = Pio_4<A0>(); static const A0 small= lt(x, Sqrteps<A0>()); static const A0 morebits = double_constant<A0, 0xbc91a62633145c07ll>(); static const A0 ct1 = double_constant<A0, 0x3fe4000000000000ll>(); A0 zz1 = oneminus(x); const A0 vp = zz1*horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz1)/ horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz1); zz1 = sqrt(zz1+zz1); A0 z = pio4-zz1; zz1 = fma(zz1, vp, morebits); z = z-zz1; zz1 = z+pio4; A0 zz2 = sqr(a0); z = zz2*horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz2)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz2); zz2 = x*z+x; return b_or(b_xor(select(small, x, select(gt(x, ct1), zz1, zz2 ) ), bitofsign(a0) ), gt(x, One<A0>()) ); }
static inline A0 log(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<A0, 0xb95e8083>(), y); y = madd(Mhalf<A0>(), x2, y); A0 z = x + y; // std::cout << "fe " << fe << std::endl; // std::cout << "z " << z << std::endl; // std::cout << "a0 " << a0 << std::endl; // std::cout << "rec(a0) " << rec(a0) << std::endl; A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z); // std::cout << "y1 " << y1 << std::endl; // std::cout << "y2 " << y2 << std::endl; return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0_n acos(const A0_n a0_n) { const A0 a0 = { a0_n }; const A0 as = { asin( sqrt(Half<A0>() - Half<A0>()*a0) )}; A0 z1 = Two<A0>() * as; const A0 as1 = {asin(a0)}; A0 z2 = ((Pio_4<A0>() - as1)+double_constant<A0, 0x3c91a62633145c07ll>())+ Pio_4<A0>(); return b_or( gt(abs(a0),One<A0>()), sel( gt(a0,Half<A0>()), z1, z2)); }
static inline A0 cota(const A0& a0, const regular&) { if (nt2::is_invalid(a0)||redu_t::cot_invalid(a0)) return nt2::Nan<A0>(); const A0 x = nt2::abs(a0); const A0 bos = nt2::bitofsign(a0); if (!a0) return b_or(nt2::Inf<A0>(), bos); A0 xr = nt2::Nan<A0>(); const int_type n = redu_t::reduce(x, xr); const A0 y = eval_t::cot_eval(xr, 1-((n&1)<<1)); return nt2::bitwise_xor(y, bos); }
static inline A0 cota(const A0& a0) { if (nt2::is_invalid(a0)||redu_t::cot_invalid(a0)) return Nan<A0>(); A0 x = nt2::abs(a0); if (redu_t::replacement_needed(x)) { return redu_t::cot_replacement(a0); } else { const A0 bos = bitofsign(a0); if (!a0) return b_or(Inf<A0>(), bos); A0 xr = Nan<A0>(), xc, y; int_type n = redu_t::reduce(x, xr, xc); y = eval_t::cot_eval(xr, xc, 1-((n&1)<<1)); return b_xor(y, bos); } }
static inline A0 acos(const A0& a0) { A0 z1 = Two<A0>() * asin( sqrt(Half<A0>() - Half<A0>()*a0) ); A0 z2 = ((Pio_4<A0>() - asin(a0))+double_constant<A0, 0x3c91a62633145c07ll>())+ Pio_4<A0>(); return b_or( gt(abs(a0),One<A0>()), sel( gt(a0,Half<A0>()), z1, z2)); }
tag::cpu_, Dummy> : callable { template<class Sig> struct result; template<class This,class A0> struct result<This(A0)> : meta::strip<A0>{};// NT2_FUNCTOR_CALL(1) { A0 const na = isnez(a0); A0 n = add(shri(a0, 4), Four<A0>()); A0 n1 = shri(n+a0/n, 1); A0 msk = b_and(isle(n1,n), na); n = select(msk,n1,n); n1 = sqr(n); msk = b_or(isgt(n1,a0), b_and(iseqz(n1), na)); n = seladd( msk, n, Mone<A0>()); return seladd(na, Zero<A0>(), n); } }; } } ///////////////////////////////////////////////////////////////////////////// // Implementation when type A0 is arithmetic_ ///////////////////////////////////////////////////////////////////////////// NT2_REGISTER_DISPATCH(tag::sqrt_, tag::cpu_, (A0), ((simd_<arithmetic_<A0>,tag::xop_>)) );
void cover_sc_bit() { sc_bit bdef; sc_bit bf(false); sc_bit bt(true); sc_bit b0(0); sc_bit b1(1); try { sc_bit foo(2); } catch (sc_report) { cout << "Caught exception for sc_bit(2)\n"; } sc_bit bc0('0'); sc_bit bc1('1'); try { sc_bit foo('2'); } catch (sc_report) { cout << "Caught exception for sc_bit('2')\n"; } sc_bit blc0(sc_logic('0')); sc_bit blc1(sc_logic('1')); sc_bit blcx(sc_logic('X')); sc_bit bcop(bt); cout << bdef << bf << bt << b0 << b1 << bc0 << bc1 << blc0 << blc1 << blcx << bcop << endl; sc_bit b; b = bt; sc_assert(b); b = 0; sc_assert(!b); b = true; sc_assert(b.to_bool()); b = '0'; sc_assert(!b.to_bool()); b = sc_logic('1'); sc_assert(b.to_char() == '1'); b = bf; sc_assert(~b); b |= bt; sc_assert(b); b &= bf; sc_assert(!b); b |= 1; sc_assert(b); b &= 0; sc_assert(!b); b |= '1'; sc_assert(b); b &= '0'; sc_assert(!b); b |= true; sc_assert(b); b &= false; sc_assert(!b); b ^= bt; sc_assert(b); b ^= 1; sc_assert(!b); b ^= '1'; sc_assert(b); b ^= true; sc_assert(!b); sc_assert(b == bf); sc_assert(b == 0); sc_assert(b == '0'); sc_assert(b == false); b = 1; sc_assert(b == bt); sc_assert(b == 1); sc_assert(b == '1'); sc_assert(b == true); sc_assert(1 == b); sc_assert('1' == b); sc_assert(true == b); sc_assert(equal(b, bt)); sc_assert(equal(b, 1)); sc_assert(equal(b, '1')); sc_assert(equal(b, true)); sc_assert(equal(1, b)); sc_assert(equal('1', b)); sc_assert(equal(true, b)); b = 0; sc_assert(b != bt); sc_assert(b != 1); sc_assert(b != '1'); sc_assert(b != true); sc_assert(1 != b); sc_assert('1' != b); sc_assert(true != b); sc_assert(not_equal(b, bt)); sc_assert(not_equal(b, 1)); sc_assert(not_equal(b, '1')); sc_assert(not_equal(b, true)); sc_assert(not_equal(1, b)); sc_assert(not_equal('1', b)); sc_assert(not_equal(true, b)); // the following assertion is incorrect, because the b_not() method // is destructive, i.e., it implements something like b ~= void. /// sc_assert(b == b_not(b.b_not())); b.b_not(); sc_assert(b); sc_bit bx; b_not(bx, b0); sc_assert(bx); b_not(bx, b1); sc_assert(!bx); cout << (b0|b0) << (b0|b1) << (b1|b0) << (b1|b1) << endl; cout << (b0&b0) << (b0&b1) << (b1&b0) << (b1&b1) << endl; cout << (b0^b0) << (b0^b1) << (b1^b0) << (b1^b1) << endl; cout << (b0|0) << (b0|1) << (b1|0) << (b1|1) << endl; cout << (b0&0) << (b0&1) << (b1&0) << (b1&1) << endl; cout << (b0^0) << (b0^1) << (b1^0) << (b1^1) << endl; cout << (b0|'0') << (b0|'1') << (b1|'0') << (b1|'1') << endl; cout << (b0&'0') << (b0&'1') << (b1&'0') << (b1&'1') << endl; cout << (b0^'0') << (b0^'1') << (b1^'0') << (b1^'1') << endl; cout << (b0|true) << (b0|false) << (b1|true) << (b1|false) << endl; cout << (b0&true) << (b0&false) << (b1&true) << (b1&false) << endl; cout << (b0^true) << (b0^false) << (b1^true) << (b1^false) << endl; cout << (0|b0) << (0|b1) << (1|b0) << (1|b1) << endl; cout << (0&b0) << (0&b1) << (1&b0) << (1&b1) << endl; cout << (0^b0) << (0^b1) << (1^b0) << (1^b1) << endl; cout << ('0'|b0) << ('0'|b1) << ('1'|b0) << ('1'|b1) << endl; cout << ('0'&b0) << ('0'&b1) << ('1'&b0) << ('1'&b1) << endl; cout << ('0'^b0) << ('0'^b1) << ('1'^b0) << ('1'^b1) << endl; cout << (false|b0) << (false|b1) << (true|b0) << (true|b1) << endl; cout << (false&b0) << (false&b1) << (true&b0) << (true&b1) << endl; cout << (false^b0) << (false^b1) << (true^b0) << (true^b1) << endl; cout << b_or(b0,b0) << b_or(b0,b1) << b_or(b1,b0) << b_or(b1,b1) << endl; cout << b_and(b0,b0) << b_and(b0,b1) << b_and(b1,b0) << b_and(b1,b1) << endl; cout << b_xor(b0,b0) << b_xor(b0,b1) << b_xor(b1,b0) << b_xor(b1,b1) << endl; cout << b_or(b0,0) << b_or(b0,1) << b_or(b1,0) << b_or(b1,1) << endl; cout << b_and(b0,0) << b_and(b0,1) << b_and(b1,0) << b_and(b1,1) << endl; cout << b_xor(b0,0) << b_xor(b0,1) << b_xor(b1,0) << b_xor(b1,1) << endl; cout << b_or(b0,'0') << b_or(b0,'1') << b_or(b1,'0') << b_or(b1,'1') << endl; cout << b_and(b0,'0') << b_and(b0,'1') << b_and(b1,'0') << b_and(b1,'1') << endl; cout << b_xor(b0,'0') << b_xor(b0,'1') << b_xor(b1,'0') << b_xor(b1,'1') << endl; cout << b_or(b0,false) << b_or(b0,true) << b_or(b1,false) << b_or(b1,true) << endl; cout << b_and(b0,false) << b_and(b0,true) << b_and(b1,false) << b_and(b1,true) << endl; cout << b_xor(b0,false) << b_xor(b0,true) << b_xor(b1,false) << b_xor(b1,true) << endl; cout << b_or(0,b0) << b_or(0,b1) << b_or(1,b0) << b_or(1,b1) << endl; cout << b_and(0,b0) << b_and(0,b1) << b_and(1,b0) << b_and(1,b1) << endl; cout << b_xor(0,b0) << b_xor(0,b1) << b_xor(1,b0) << b_xor(1,b1) << endl; cout << b_or('0',b0) << b_or('0',b1) << b_or('1',b0) << b_or('1',b1) << endl; cout << b_and('0',b0) << b_and('0',b1) << b_and('1',b0) << b_and('1',b1) << endl; cout << b_xor('0',b0) << b_xor('0',b1) << b_xor('1',b0) << b_xor('1',b1) << endl; cout << b_or(false,b0) << b_or(false,b1) << b_or(true,b0) << b_or(true,b1) << endl; cout << b_and(false,b0) << b_and(false,b1) << b_and(true,b0) << b_and(true,b1) << endl; cout << b_xor(false,b0) << b_xor(false,b1) << b_xor(true,b0) << b_xor(true,b1) << endl; b_or(b, b0, b1); sc_assert(b); b_and(b, b0, b1); sc_assert(!b); b_xor(b, b0, b1); sc_assert(b); }
static inline A0 cota(const A0& a0) { A0 x = scale(a0); return b_or(b_or(not_in_range(a0), is_eqz(a0)), rec(eval_t::base_tancot_eval(x))); }
static inline A0 tana(const A0& a0) { A0 x = scale(a0); return b_or(not_in_range(a0), eval_t::base_tancot_eval(x)); }
static inline A0 sina(const A0& a0) { A0 x = scale(a0); return b_or(not_in_range(a0), eval_t::sin_eval(sqr(x), x, Zero<A0>())); }