Exemple #1
0
 static inline AA1 case_1(AA1 & x,  int32_t sn, const AA1 & n)
 {
   typedef typename meta::scalar_of<AA1>::type sA1; 
   /*		Power series expansion		*/
   AA1 eqzx = is_eqz(x);
   x = seladd(is_eqz(x), x, One<A1>()); //loop is infinite for x == 0
   sA1 psi1 = Zero<sA1>(); 
   for( int32_t i=sn-1; i; --i )  psi1 += rec((sA1)i);
   AA1 psi = -Euler<A1>()-nt2::log(x)+splat<A1>(psi1); 
   AA1 t; 
   AA1 z = -x;
   AA1 xk = Zero<A1>();
   AA1 yk = One<A1>();
   AA1 pk = One<A1>() - n;
   AA1 ans = ( sn == 1 ) ? Zero<A1>() : rec(pk);
   do
   {
     xk += One<AA1>();
     yk *= z/xk;
     pk += One<AA1>();
     ans = seladd(is_nez(pk), ans, yk/pk); 
     t = select(is_nez(ans), nt2::abs(yk/ans), One<AA1>());
   }
   while( nt2::bitwise_any(gt(t, Halfeps<A1>())));
   return seladd(eqzx,(nt2::powi(z, sn-1) * psi / nt2::gamma(n)) - ans, Inf<A1>());
   //TO DO pow->powi and gamma splatted from scalar or mere factorial call
 }
Exemple #2
0
 static BOOST_FORCEINLINE i_t select(A0& x)
 {
   // find significand in antilog table A[]
   i_t i = One<i_t>();
   i = if_else((x <= twomio16(Nine<i_t>()))  , Nine<i_t>(), i);
   i = seladd ((x <= twomio16(i+Four<i_t>())), i, Four<i_t>());
   i = seladd ((x <= twomio16(i+Two<i_t>())) , i, Two<i_t>() );
   i = if_else((x >= twomio16(One<i_t>()))   , Mone<i_t>(), i);
   i = inc(i);
   A0 tmp = twomio16(i);
   x -= tmp;
   x -= continuation(shr(i, 1));
   x /= tmp;
   return i;
 }
Exemple #3
0
 static inline A0_n kernel_atan(const A0_n a0_n)
 {
   typedef typename meta::scalar_of<A0>::type sA0;
   const A0 tan3pio8  = double_constant<A0, 0x4003504f333f9de6ll>();
   const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>();
   const A0 a0 = {a0_n};
   const A0 x =  nt2::abs(a0);
   const bA0 flag1 = lt(x,  tan3pio8);             
   const bA0 flag2 = logical_and(ge(x, tanpio8), flag1); 
   A0 yy = if_zero_else(flag1, Pio_2<A0>());
   yy = select(flag2, Pio_4<A0>(), yy);
   A0 xx = select(flag1, x, -rec(x));
   xx = select(flag2, minusone(x)/oneplus(x),xx);
   A0 z = sqr(xx);
   z = z*horner< NT2_HORNER_COEFF_T(sA0, 5,
                                (0xbfec007fa1f72594ll,
                                 0xc03028545b6b807all,
                                 0xc052c08c36880273ll,
                                 0xc05eb8bf2d05ba25ll,
                                 0xc0503669fd28ec8ell)
                                )>(z)/
       horner< NT2_HORNER_COEFF_T(sA0, 6,
                                (0x3ff0000000000000ll,
                                 0x4038dbc45b14603cll,
                                 0x4064a0dd43b8fa25ll,
                                 0x407b0e18d2e2be3bll,
                                 0x407e563f13b049eall,
                                 0x4068519efbbd62ecll)
                                )>(z);
   z = fma(xx, z, xx);
   const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>();
   z = seladd(flag2, z, mul(Half<A0>(), morebits));
   z = z+if_zero_else(flag1, morebits);
   return yy + z;
 }
Exemple #4
0
 static inline A0 log10(const A0& a0)
 {
     A0 dk, hfsq, s, R, f;
     kernel_log(a0, dk, hfsq, s, R, f);
     A0 y2 = -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>();
     A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
     return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
 }
Exemple #5
0
 static inline A0 log(const A0& a0)
 {
     // ln(2)hi  =  6.93147180369123816490e-01  or  0x3fe62e42fee00000
     // ln(2)lo  =  1.90821492927058770002e-10  or  0x3dea39ef35793c76
     A0 dk, hfsq, s, R, f;
     kernel_log(a0, dk, hfsq, s, R, f);
     A0 y2 =  mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())-
              ((hfsq-(s*(hfsq+R)+mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f);
     A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
     return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
 }
Exemple #6
0
	static inline A0 log2(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  madd(Mhalf<A0>(),x2, y);
	  // multiply log of fraction by log2(e)
	  A0 z = madd(x,single_constant<A0, 0x3ee2a8ed>(),mul(y,single_constant<A0, 0x3ee2a8ed>()));// 0.44269504088896340735992
	  A0 z1 = ((z+y)+x)+fe;
	  A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing
	  return seladd(is_inf(y1),b_or(z1, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
Exemple #7
0
 static inline A0 log(const A0& a0)
 {
   A0 x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y = madd(fe, single_constant<A0, 0xb95e8083>(), y);
   y = madd(Mhalf<A0>(), x2, y);
   A0 z  = x + y;
   A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
   A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z);
   y2 = if_nan_else(logical_or(is_ltz(a0), is_nan(a0)), y2); 
   return seladd(is_inf(y1), y2, y1); 
 }
Exemple #8
0
	static inline A0 log10(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  amul(y, -Half<A0>(), x2);
	  // multiply log of fraction by log10(e) and base 2 exponent by log10(2)
	  A0 z = mul(x+y, single_constant<A0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo
	  z = amul(z, y, single_constant<A0, 0x3ede0000>()); //4.3359375E-1f	       // log10(e)hi
	  z = amul(z, x, single_constant<A0, 0x3ede0000>());
	  z = amul(z, fe, single_constant<A0, 0x39826a14>());//3.0078125E-1f              // log10(2)hi
	  z = amul(z, fe, single_constant<A0, 0x3e9a0000>());//2.48745663981195213739E-4f // log10(2)lo
	  A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing
	  return seladd(is_inf(y1), b_or(z, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
Exemple #9
0
 static inline A0_n acos(const A0_n a0_n)
 {
   // 2130706432 values computed.
   // 1968272987 values (92.38%) within 0.0 ULPs
   //  162433445 values (7.62%)  within 0.5 ULPs
   // 8.5 cycles/element SSE4.2 g++-4.8
   const A0 a0 = a0_n;
   A0 x = nt2::abs(a0);
   bA0 x_larger_05 = gt(x, nt2::Half<A0>());
   x  = if_else(x_larger_05, nt2::sqrt(fma(nt2::Mhalf<A0>(), x, nt2::Half<A0>())), a0);
   x  = asin(x);
   x =  seladd(x_larger_05, x, x);
   x  = nt2::if_else(lt(a0, nt2::Mhalf<A0>()), nt2::Pi<A0>()-x, x);
   return nt2::if_else(x_larger_05, x, nt2::Pio_2<A0>()-x);
 }
Exemple #10
0
	static inline A0 log(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y = madd(fe, single_constant<A0, 0xb95e8083>(), y);
	  y = madd(Mhalf<A0>(), x2, y);
	  A0 z  = x + y;
	  // 	std::cout << "fe " << fe << std::endl;
	  // 	std::cout << "z  " << z << std::endl;
	  // 	std::cout << "a0 " << a0 << std::endl;
	  // 	std::cout << "rec(a0) " << rec(a0) << std::endl;
	  A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
	  A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z);
	  // 	std::cout << "y1 " << y1 << std::endl;
	  // 	std::cout << "y2 " << y2 << std::endl;
	  return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
Exemple #11
0
 static inline AA1 case_2(const AA1 & x,  int32_t /*sn*/, const AA1 & n)
 {
   typedef typename meta::scalar_of<AA1>::type sAA1;
   int32_t sk = 1;
   AA1 t; 
   AA1 pkm2 = One<AA1>();
   AA1 qkm2 = x;
   AA1 pkm1 = One<AA1>();
   AA1 qkm1 = x + n;
   AA1 ans = pkm1/qkm1;
   do
   {
     AA1 test =  is_nez(splat<AA1>(is_odd(++sk)));
     AA1 k_2 =  splat<AA1>(sk >> 1); 
     AA1 yk = sel(test, One<AA1>(), x);
     AA1 xk = seladd(test, k_2, n); 
     AA1 pk = pkm1 * yk  +  pkm2 * xk;
     AA1 qk = qkm1 * yk  +  qkm2 * xk;
     AA1 r = pk/qk;
     test = is_nez(qk); 
     t = sel(test,nt2::abs((ans-r)/r),One<AA1>());
     ans = sel(test, r, ans);
     pkm2 = pkm1;
     pkm1 = pk;
     qkm2 = qkm1;
     qkm1 = qk;
     test = gt(nt2::abs(pk), Expnibig<AA1>());
     AA1 fac = sel(test, Halfeps<AA1>(), One<AA1>()); 
     pkm2 *= fac;
     pkm1 *= fac;
     qkm2 *= fac;
     qkm1 *= fac;
   }
   while( nt2::bitwise_any(gt(t, Halfeps<AA1>())) );
   return ans*nt2::exp(-x);
 }
Exemple #12
0
 static inline A0 atan(const  A0& a0)
 {
     typedef typename meta::scalar_of<A0>::type sA0;
     static const A0 tan3pio8  = double_constant<A0, 0x4003504f333f9de6ll>();
     static const A0 Twothird = double_constant<A0, 0x3fe51eb851eb851fll>();
     static const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>();
     A0 x =  abs(a0);
     const A0 flag1 = lt(x,  double_constant<A0, 0x4003504f333f9de6ll>());              //tan3pio8
     const A0 flag2 = b_and(ge(x, double_constant<A0, 0x3fda827999fcef31ll>()), flag1); //tanpio8
     A0 yy =  b_notand(flag1, Pio_2<A0>());
     yy =  select(flag2, Pio_4<A0>(), yy);
     A0 xx =   select(flag1, x, -rec(x));
     xx =  select(flag2, minusone(x)/oneplus(x),xx);
     A0 z = sqr(xx);
     z = z*horner< NT2_HORNER_COEFF_T(sA0, 5,
                                      (0xbfec007fa1f72594ll,
                                       0xc03028545b6b807all,
                                       0xc052c08c36880273ll,
                                       0xc05eb8bf2d05ba25ll,
                                       0xc0503669fd28ec8ell)
                                     )>(z)/
         horner< NT2_HORNER_COEFF_T(sA0, 6,
                                    (0x3ff0000000000000ll,
                                     0x4038dbc45b14603cll,
                                     0x4064a0dd43b8fa25ll,
                                     0x407b0e18d2e2be3bll,
                                     0x407e563f13b049eall,
                                     0x4068519efbbd62ecll)
                                   )>(z);
     z = fma(xx, z, xx);
     //	static const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>();
     z = seladd(flag2, z, mul(Half<A0>(),  double_constant<A0, 0x3c91a62633145c07ll>()));
     z = z+b_notand(flag1, double_constant<A0, 0x3c91a62633145c07ll>());
     yy = yy + z;
     return b_xor(yy, bitofsign(a0));
 }
Exemple #13
0
  {
    template<class Sig> struct result;
    template<class This,class A0>
    struct result<This(A0)>  : meta::strip<A0>{};//

    NT2_FUNCTOR_CALL(1)
    {
      A0 const na  = isnez(a0);
      A0 n   = add(shri(a0, 4), Four<A0>());
      A0 n1  = shri(n+a0/n, 1);
      A0 msk = b_and(isle(n1,n), na);

      n   = select(msk,n1,n);
      n1  = sqr(n);
      msk = b_or(isgt(n1,a0), b_and(iseqz(n1), na));
      n   = seladd( msk, n, Mone<A0>());

      return seladd(na, Zero<A0>(), n);
    }
  };
} }

/////////////////////////////////////////////////////////////////////////////
// Implementation when type A0 is arithmetic_
/////////////////////////////////////////////////////////////////////////////
NT2_REGISTER_DISPATCH(tag::sqrt_, tag::cpu_,
                       (A0),
                       ((simd_<arithmetic_<A0>,tag::xop_>))
                      );

namespace nt2 { namespace ext