Ejemplo n.º 1
0
 template<class R0,class R1> inline void
 eval(A0 const& a0, R0& r0, R1& r1, const simd::native<typename  boost::simd::meta::int64_t_<A0>::type,boost::simd::tag::sse_ > &)const
 {
   typedef simd::native<typename  boost::simd::meta::int64_t_<A0>::type,boost::simd::tag::sse_> rtype;
   r1 = bitwise_cast<rtype>(_mm_unpackhi_epi32(a0, is_ltz(a0)));
   r0 = bitwise_cast<rtype>(_mm_unpacklo_epi32(a0, is_ltz(a0)));
 }
Ejemplo n.º 2
0
 BOOST_FORCEINLINE static void doit2(const T& a, value_type& b, Out0& r)
 {
   r.resize(extent(a));
   typedef typename A0::index_type       index_type;
   typedef table<value_type, index_type> result_type;
   bool is_ltz_b = is_ltz(b); 
   if(is_ltz(b)) b = -b; 
   value_type m = nt2::trunc(b);
   value_type f = b-m;
   result_type q, t;
   // tie(q, t) = schur(a,'N'/*"complex"*/); // t is complex schur form.        result_type e, v;
   if (false && isdiagonal(t))
     {
       t = nt2::from_diag(nt2::pow(diag_of(t), m));
       if(is_ltz_b) t = nt2::inv(t);        
       r = nt2::mtimes(q, nt2::mtimes(t, nt2::trans(nt2::conj(q))));
       return;
     }
   else
     { //use iterative method
       r = nt2::eye(nt2::size(a), meta::as_<value_type>());
       result_type rf = r;
       if (m)
         {
           result_type a00 = a;
           while (m >= nt2::One<value_type>())
             {
               if (nt2::is_odd(m))
                 {
                   r =  nt2::mtimes(a00, r);
                 }
               a00 =  nt2::mtimes(a00, a00);
               m =  nt2::trunc(m/2); //Half<value_type>(); or >> 1
             }
         }
       if(!f)
         {
           if(is_ltz_b) r = nt2::inv(r); 
           return;
         }
       else
         {
           result_type a00 = nt2::sqrtm(a);
           value_type thresh = nt2::Half<value_type>();
           while (f > Zero<value_type>())
             {
               if (f >= thresh)
                 {
                   rf = nt2::mtimes(rf, a00);
                   f -= thresh;
                 }
               thresh *= nt2::Half<value_type>();
               a00 =  nt2::sqrtm(a00);
             }
         }
       r= nt2::mtimes(r, rf);
       if(is_ltz_b) r = nt2::inv(r); 
     }
 }
Ejemplo n.º 3
0
 template<class R0,class R1> BOOST_FORCEINLINE void
 eval( A0 const& a0, R0& r0, R1& r1
     , dispatch::meta::as_<int64_t> const&
     ) const
 {
   r1 = bitwise_cast<R1>(_mm_unpackhi_epi32(a0, is_ltz(a0)));
   r0 = bitwise_cast<R0>(_mm_unpacklo_epi32(a0, is_ltz(a0)));
 }
Ejemplo n.º 4
0
 static inline A0 log10(const A0& a0)
 {
     A0 dk, hfsq, s, R, f;
     kernel_log(a0, dk, hfsq, s, R, f);
     A0 y2 = -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>();
     A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
     return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
 }
Ejemplo n.º 5
0
Archivo: d_log.hpp Proyecto: faldah/nt2
	static inline A0 log10(const A0& a0)
	{
	  if (a0 == Inf<A0>()) return a0;
	  if (is_eqz(a0)) return Minf<A0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>();
	  A0 dk, hfsq, s, R, f;
	  kernel_log(a0, dk, hfsq, s, R, f);
	  return -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>();
	}
Ejemplo n.º 6
0
	static inline A0 log2(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  madd(Mhalf<A0>(),x2, y);
	  // multiply log of fraction by log2(e)
	  A0 z = madd(x,single_constant<A0, 0x3ee2a8ed>(),mul(y,single_constant<A0, 0x3ee2a8ed>()));// 0.44269504088896340735992
	  A0 z1 = ((z+y)+x)+fe;
	  A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing
	  return seladd(is_inf(y1),b_or(z1, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
Ejemplo n.º 7
0
 static inline A0 log(const A0& a0)
 {
     // ln(2)hi  =  6.93147180369123816490e-01  or  0x3fe62e42fee00000
     // ln(2)lo  =  1.90821492927058770002e-10  or  0x3dea39ef35793c76
     A0 dk, hfsq, s, R, f;
     kernel_log(a0, dk, hfsq, s, R, f);
     A0 y2 =  mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())-
              ((hfsq-(s*(hfsq+R)+mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f);
     A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
     return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
 }
Ejemplo n.º 8
0
Archivo: d_log.hpp Proyecto: faldah/nt2
	static inline A0 log(const A0& a0)
	{
	  // ln(2)hi  =  6.93147180369123816490e-01  or  0x3fe62e42fee00000
	  // ln(2)lo  =  1.90821492927058770002e-10  or  0x3dea39ef35793c76
	  if (a0 == Inf<A0>()) return a0;
	  if (is_eqz(a0)) return Minf<A0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>();
	  A0 dk, hfsq, s, R, f;
	  kernel_log(a0, dk, hfsq, s, R, f);
	  return  mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())-
	    ((hfsq-(s*(hfsq+R)+mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f);
	}
Ejemplo n.º 9
0
 static inline A0 log(const A0& a0)
 {
   A0 x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y = madd(fe, single_constant<A0, 0xb95e8083>(), y);
   y = madd(Mhalf<A0>(), x2, y);
   A0 z  = x + y;
   A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
   A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z);
   y2 = if_nan_else(logical_or(is_ltz(a0), is_nan(a0)), y2); 
   return seladd(is_inf(y1), y2, y1); 
 }
Ejemplo n.º 10
0
 inline float log(const float& a0)
 {
   typedef float A0;
   if (a0 == Inf<A0>()) return a0;
   if (iseqz(a0)) return Minf<A0>();
   if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>();
   float x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y = fma(fe, Const<float, 0xb95e8083>(), y);
   y = fma(Mhalf<A0>(), x2, y);
   A0 z  = x + y;
   return fma(Const<float, 0x3f318000>(), fe, z);
 }
Ejemplo n.º 11
0
	static inline A0 log(const A0& a0)
	{
	  typedef typename meta::strip<A0>::type stA0; 
	  if (a0 == Inf<stA0>()) return a0;
	  if (is_eqz(a0)) return Minf<stA0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>();
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y = madd(fe, single_constant<stA0, 0xb95e8083>(), y);
	  y = madd(Mhalf<stA0>(), x2, y);
	  A0 z  = x + y;
	  return madd(single_constant<stA0, 0x3f318000>(), fe, z);
	}
Ejemplo n.º 12
0
	static inline A0 log10(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  amul(y, -Half<A0>(), x2);
	  // multiply log of fraction by log10(e) and base 2 exponent by log10(2)
	  A0 z = mul(x+y, single_constant<A0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo
	  z = amul(z, y, single_constant<A0, 0x3ede0000>()); //4.3359375E-1f	       // log10(e)hi
	  z = amul(z, x, single_constant<A0, 0x3ede0000>());
	  z = amul(z, fe, single_constant<A0, 0x39826a14>());//3.0078125E-1f              // log10(2)hi
	  z = amul(z, fe, single_constant<A0, 0x3e9a0000>());//2.48745663981195213739E-4f // log10(2)lo
	  A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing
	  return seladd(is_inf(y1), b_or(z, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
Ejemplo n.º 13
0
	static inline A0 log2(const A0& a0)
	{
	  typedef typename meta::strip<A0>::type stA0; 
	  if (a0 == Inf<stA0>()) return a0;
	  if (is_eqz(a0)) return Minf<stA0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>();
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  madd(Mhalf<stA0>(),x2, y);
	  // multiply log of fraction by log2(e)
	  A0 z = madd( x
		       , single_constant<stA0, 0x3ee2a8ed>()
		       , mul(y,single_constant<stA0, 0x3ee2a8ed>())// 0.44269504088896340735992
		       );
	  return ((z+y)+x)+fe;
	}
Ejemplo n.º 14
0
	static inline A0 log10(const A0& a0)
	{
	  typedef typename meta::strip<A0>::type stA0; 
	  if (a0 == Inf<stA0>()) return a0;
	  if (is_eqz(a0)) return Minf<stA0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>();
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  amul(y, Mhalf<stA0>(), x2);
	  // multiply log of fraction by log10(e) and base 2 exponent by log10(2)
	  A0 z = mul(x+y, single_constant<stA0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo
	  z = amul(z, y, single_constant<stA0, 0x3ede0000>()); //4.3359375E-1f	       // log10(e)hi
	  z = amul(z, x, single_constant<stA0, 0x3ede0000>());
	  z = amul(z, fe, single_constant<stA0, 0x39826a14>());//3.0078125E-1f              // log10(2)hi
	  return amul(z, fe, single_constant<stA0, 0x3e9a0000 >());//2.48745663981195213739E-4f // log10(2)lo
	}
Ejemplo n.º 15
0
 inline float log2(const float& a0)
 {
   typedef float A0;
   if (a0 == Inf<A0>()) return a0;
   if (iseqz(a0)) return Minf<A0>();
   if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>();
   A0 x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y =  fma(Mhalf<A0>(),x2, y);
   // multiply log of fraction by log2(e)
   A0 z = fma( x
             , Const<float, 0x3ee2a8ed>()
             , mul(y,Const<float, 0x3ee2a8ed>())// 0.44269504088896340735992
             );
   return ((z+y)+x)+fe;
 }
Ejemplo n.º 16
0
	static inline A0 atan(const  A0& a0)
	{
	  //	static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>();
	  if (is_eqz(a0))  return a0;
	  if (is_inf(a0)) return Pio_2<A0>()*sign(a0);
	  A0 x =  nt2::abs(a0);
	  A0 y;
	  A0 flag = (x >  double_constant<double,0x4003504f333f9de6ll>());
	  if (flag)
	    {
	      y =  Pio_2<A0>();
	      x =  -rec(x);
	    }
	  else if ((x <=  double_constant<double,0x3fe51eb851eb851fll>()))
	    {
	      y = Zero<A0>();
	    }
	  else
	    {
	      y = Pio_4<A0>();
	      flag = Half<A0>();
	      x = minusone(x)/oneplus(x);
	    }
	  A0 z = sqr(x);
	  z = z*horner< NT2_HORNER_COEFF_T(stype, 5,
                     (0xbfec007fa1f72594ll,
                0xc03028545b6b807all,
                0xc052c08c36880273ll,
                0xc05eb8bf2d05ba25ll,
                0xc0503669fd28ec8ell)
                     )>(z)/
              horner< NT2_HORNER_COEFF_T(stype, 6,
                     (0x3ff0000000000000ll,
                0x4038dbc45b14603cll,
                0x4064a0dd43b8fa25ll,
                0x407b0e18d2e2be3bll,
                0x407e563f13b049eall,
                0x4068519efbbd62ecll)
                     )>(z);
	  z = madd(x, z, x);
	  static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>();
	  z += flag * morebits;
	  y = y + z;
	  if( is_ltz(a0) )  y = -y;
	  return(y);
	}
Ejemplo n.º 17
0
	static inline A0 log(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y = madd(fe, single_constant<A0, 0xb95e8083>(), y);
	  y = madd(Mhalf<A0>(), x2, y);
	  A0 z  = x + y;
	  // 	std::cout << "fe " << fe << std::endl;
	  // 	std::cout << "z  " << z << std::endl;
	  // 	std::cout << "a0 " << a0 << std::endl;
	  // 	std::cout << "rec(a0) " << rec(a0) << std::endl;
	  A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
	  A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z);
	  // 	std::cout << "y1 " << y1 << std::endl;
	  // 	std::cout << "y2 " << y2 << std::endl;
	  return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
Ejemplo n.º 18
0
                          );

namespace nt2 { namespace ext
{
  template<class X, class Dummy>
  struct call<tag::negation_(tag::simd_<tag::arithmetic_, X> ,
                             tag::simd_<tag::arithmetic_, X> ),
              tag::cpu_, Dummy> : callable
  {
    template<class Sig> struct result;
    template<class This,class A0>
    struct result<This(A0,A0)> : meta::strip<A0>{};//

    NT2_FUNCTOR_CALL(2)
    {
      return  sel(is_ltz(a1),-a0,b_and(is_nez(a1), a0));
    }
  };
} }

/////////////////////////////////////////////////////////////////////////////
// Implementation when type A0 is unsigned
/////////////////////////////////////////////////////////////////////////////
NT2_REGISTER_DISPATCH(tag::negation_, tag::cpu_,
                           (A0)(X),
                           ((simd_<unsigned_<A0>,X>))
                           ((simd_<unsigned_<A0>,X>))
                          );

namespace nt2 { namespace ext
{
Ejemplo n.º 19
0
namespace nt2 { namespace ext
{
  template<class X, class Dummy>
  struct call<tag::negate_(tag::simd_<tag::arithmetic_, X> ,
                           tag::simd_<tag::arithmetic_, X> ),
              tag::cpu_, Dummy> : callable
  {
    template<class Sig> struct result;
    template<class This,class A0>
    struct result<This(A0,A0)>
      : meta::strip<A0>{};//

    NT2_FUNCTOR_CALL(2)
    {
        return  sel(is_ltz(a1),-a0,is_nez(a1)&a0);
    }
  };
} }

/////////////////////////////////////////////////////////////////////////////
// Implementation when type A0 is unsigned
/////////////////////////////////////////////////////////////////////////////
NT2_REGISTER_DISPATCH(tag::negate_, tag::cpu_,
                         (A0)(X),
                         ((simd_<unsigned_<A0>,X>))
                         ((simd_<unsigned_<A0>,X>))
                        );

namespace nt2 { namespace ext
{