template<class R0,class R1> inline void eval(A0 const& a0, R0& r0, R1& r1, const simd::native<typename boost::simd::meta::int64_t_<A0>::type,boost::simd::tag::sse_ > &)const { typedef simd::native<typename boost::simd::meta::int64_t_<A0>::type,boost::simd::tag::sse_> rtype; r1 = bitwise_cast<rtype>(_mm_unpackhi_epi32(a0, is_ltz(a0))); r0 = bitwise_cast<rtype>(_mm_unpacklo_epi32(a0, is_ltz(a0))); }
BOOST_FORCEINLINE static void doit2(const T& a, value_type& b, Out0& r) { r.resize(extent(a)); typedef typename A0::index_type index_type; typedef table<value_type, index_type> result_type; bool is_ltz_b = is_ltz(b); if(is_ltz(b)) b = -b; value_type m = nt2::trunc(b); value_type f = b-m; result_type q, t; // tie(q, t) = schur(a,'N'/*"complex"*/); // t is complex schur form. result_type e, v; if (false && isdiagonal(t)) { t = nt2::from_diag(nt2::pow(diag_of(t), m)); if(is_ltz_b) t = nt2::inv(t); r = nt2::mtimes(q, nt2::mtimes(t, nt2::trans(nt2::conj(q)))); return; } else { //use iterative method r = nt2::eye(nt2::size(a), meta::as_<value_type>()); result_type rf = r; if (m) { result_type a00 = a; while (m >= nt2::One<value_type>()) { if (nt2::is_odd(m)) { r = nt2::mtimes(a00, r); } a00 = nt2::mtimes(a00, a00); m = nt2::trunc(m/2); //Half<value_type>(); or >> 1 } } if(!f) { if(is_ltz_b) r = nt2::inv(r); return; } else { result_type a00 = nt2::sqrtm(a); value_type thresh = nt2::Half<value_type>(); while (f > Zero<value_type>()) { if (f >= thresh) { rf = nt2::mtimes(rf, a00); f -= thresh; } thresh *= nt2::Half<value_type>(); a00 = nt2::sqrtm(a00); } } r= nt2::mtimes(r, rf); if(is_ltz_b) r = nt2::inv(r); } }
template<class R0,class R1> BOOST_FORCEINLINE void eval( A0 const& a0, R0& r0, R1& r1 , dispatch::meta::as_<int64_t> const& ) const { r1 = bitwise_cast<R1>(_mm_unpackhi_epi32(a0, is_ltz(a0))); r0 = bitwise_cast<R0>(_mm_unpacklo_epi32(a0, is_ltz(a0))); }
static inline A0 log10(const A0& a0) { A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); A0 y2 = -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>(); A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log10(const A0& a0) { if (a0 == Inf<A0>()) return a0; if (is_eqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); return -(hfsq-(s*(hfsq+R))-f)*Invlog_10<A0>()+dk*Log_2olog_10<A0>(); }
static inline A0 log2(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(Mhalf<A0>(),x2, y); // multiply log of fraction by log2(e) A0 z = madd(x,single_constant<A0, 0x3ee2a8ed>(),mul(y,single_constant<A0, 0x3ee2a8ed>()));// 0.44269504088896340735992 A0 z1 = ((z+y)+x)+fe; A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing return seladd(is_inf(y1),b_or(z1, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log(const A0& a0) { // ln(2)hi = 6.93147180369123816490e-01 or 0x3fe62e42fee00000 // ln(2)lo = 1.90821492927058770002e-10 or 0x3dea39ef35793c76 A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); A0 y2 = mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())- ((hfsq-(s*(hfsq+R)+mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f); A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log(const A0& a0) { // ln(2)hi = 6.93147180369123816490e-01 or 0x3fe62e42fee00000 // ln(2)lo = 1.90821492927058770002e-10 or 0x3dea39ef35793c76 if (a0 == Inf<A0>()) return a0; if (is_eqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); A0 dk, hfsq, s, R, f; kernel_log(a0, dk, hfsq, s, R, f); return mul(dk, double_constant<A0, 0x3fe62e42fee00000ll>())- ((hfsq-(s*(hfsq+R)+mul(dk,double_constant<A0, 0x3dea39ef35793c76ll>())))-f); }
static inline A0 log(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<A0, 0xb95e8083>(), y); y = madd(Mhalf<A0>(), x2, y); A0 z = x + y; A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z); y2 = if_nan_else(logical_or(is_ltz(a0), is_nan(a0)), y2); return seladd(is_inf(y1), y2, y1); }
inline float log(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); float x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = fma(fe, Const<float, 0xb95e8083>(), y); y = fma(Mhalf<A0>(), x2, y); A0 z = x + y; return fma(Const<float, 0x3f318000>(), fe, z); }
static inline A0 log(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<stA0, 0xb95e8083>(), y); y = madd(Mhalf<stA0>(), x2, y); A0 z = x + y; return madd(single_constant<stA0, 0x3f318000>(), fe, z); }
static inline A0 log10(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = amul(y, -Half<A0>(), x2); // multiply log of fraction by log10(e) and base 2 exponent by log10(2) A0 z = mul(x+y, single_constant<A0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo z = amul(z, y, single_constant<A0, 0x3ede0000>()); //4.3359375E-1f // log10(e)hi z = amul(z, x, single_constant<A0, 0x3ede0000>()); z = amul(z, fe, single_constant<A0, 0x39826a14>());//3.0078125E-1f // log10(2)hi z = amul(z, fe, single_constant<A0, 0x3e9a0000>());//2.48745663981195213739E-4f // log10(2)lo A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing return seladd(is_inf(y1), b_or(z, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log2(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(Mhalf<stA0>(),x2, y); // multiply log of fraction by log2(e) A0 z = madd( x , single_constant<stA0, 0x3ee2a8ed>() , mul(y,single_constant<stA0, 0x3ee2a8ed>())// 0.44269504088896340735992 ); return ((z+y)+x)+fe; }
static inline A0 log10(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = amul(y, Mhalf<stA0>(), x2); // multiply log of fraction by log10(e) and base 2 exponent by log10(2) A0 z = mul(x+y, single_constant<stA0, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo z = amul(z, y, single_constant<stA0, 0x3ede0000>()); //4.3359375E-1f // log10(e)hi z = amul(z, x, single_constant<stA0, 0x3ede0000>()); z = amul(z, fe, single_constant<stA0, 0x39826a14>());//3.0078125E-1f // log10(2)hi return amul(z, fe, single_constant<stA0, 0x3e9a0000 >());//2.48745663981195213739E-4f // log10(2)lo }
inline float log2(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = fma(Mhalf<A0>(),x2, y); // multiply log of fraction by log2(e) A0 z = fma( x , Const<float, 0x3ee2a8ed>() , mul(y,Const<float, 0x3ee2a8ed>())// 0.44269504088896340735992 ); return ((z+y)+x)+fe; }
static inline A0 atan(const A0& a0) { // static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>(); if (is_eqz(a0)) return a0; if (is_inf(a0)) return Pio_2<A0>()*sign(a0); A0 x = nt2::abs(a0); A0 y; A0 flag = (x > double_constant<double,0x4003504f333f9de6ll>()); if (flag) { y = Pio_2<A0>(); x = -rec(x); } else if ((x <= double_constant<double,0x3fe51eb851eb851fll>())) { y = Zero<A0>(); } else { y = Pio_4<A0>(); flag = Half<A0>(); x = minusone(x)/oneplus(x); } A0 z = sqr(x); z = z*horner< NT2_HORNER_COEFF_T(stype, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = madd(x, z, x); static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>(); z += flag * morebits; y = y + z; if( is_ltz(a0) ) y = -y; return(y); }
static inline A0 log(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<A0, 0xb95e8083>(), y); y = madd(Mhalf<A0>(), x2, y); A0 z = x + y; // std::cout << "fe " << fe << std::endl; // std::cout << "z " << z << std::endl; // std::cout << "a0 " << a0 << std::endl; // std::cout << "rec(a0) " << rec(a0) << std::endl; A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z); // std::cout << "y1 " << y1 << std::endl; // std::cout << "y2 " << y2 << std::endl; return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
); namespace nt2 { namespace ext { template<class X, class Dummy> struct call<tag::negation_(tag::simd_<tag::arithmetic_, X> , tag::simd_<tag::arithmetic_, X> ), tag::cpu_, Dummy> : callable { template<class Sig> struct result; template<class This,class A0> struct result<This(A0,A0)> : meta::strip<A0>{};// NT2_FUNCTOR_CALL(2) { return sel(is_ltz(a1),-a0,b_and(is_nez(a1), a0)); } }; } } ///////////////////////////////////////////////////////////////////////////// // Implementation when type A0 is unsigned ///////////////////////////////////////////////////////////////////////////// NT2_REGISTER_DISPATCH(tag::negation_, tag::cpu_, (A0)(X), ((simd_<unsigned_<A0>,X>)) ((simd_<unsigned_<A0>,X>)) ); namespace nt2 { namespace ext {
namespace nt2 { namespace ext { template<class X, class Dummy> struct call<tag::negate_(tag::simd_<tag::arithmetic_, X> , tag::simd_<tag::arithmetic_, X> ), tag::cpu_, Dummy> : callable { template<class Sig> struct result; template<class This,class A0> struct result<This(A0,A0)> : meta::strip<A0>{};// NT2_FUNCTOR_CALL(2) { return sel(is_ltz(a1),-a0,is_nez(a1)&a0); } }; } } ///////////////////////////////////////////////////////////////////////////// // Implementation when type A0 is unsigned ///////////////////////////////////////////////////////////////////////////// NT2_REGISTER_DISPATCH(tag::negate_, tag::cpu_, (A0)(X), ((simd_<unsigned_<A0>,X>)) ((simd_<unsigned_<A0>,X>)) ); namespace nt2 { namespace ext {