// compute log in base a of x, where a is 1, 2 or ten depending on Tag static inline A0 loga(const A0& a0) { if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::isnan(a0)||isltz(a0)) return Nan<A0>(); A0 x, fe, x2, y; approx_t::kernel_log(a0, fe, x, x2, y); return finalize_t::finalize(fe, x, x2, y); }
inline float log(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); float x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = fma(fe, Const<float, 0xb95e8083>(), y); y = fma(Mhalf<A0>(), x2, y); A0 z = x + y; return fma(Const<float, 0x3f318000>(), fe, z); }
inline float log10(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||isltz(a0)) return Nan<A0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = amul(y, -Half<A0>(), x2); // multiply log of fraction by log10(e) and base 2 exponent by log10(2) A0 z = mul(x+y, Const<float, 0x3a37b152>());//7.00731903251827651129E-4f // log10(e)lo z = amul(z, y, Const<float, 0x3ede0000>()); //4.3359375E-1f // log10(e)hi z = amul(z, x, Const<float, 0x3ede0000>()); z = amul(z, fe, Const<float, 0x39826a14>());//3.0078125E-1f // log10(2)hi return amul(z, fe, Const<float, 0x3e9a0000 >());//2.48745663981195213739E-4f // log10(2)lo }
inline float log2(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = fma(Mhalf<A0>(),x2, y); // multiply log of fraction by log2(e) A0 z = fma( x , Const<float, 0x3ee2a8ed>() , mul(y,Const<float, 0x3ee2a8ed>())// 0.44269504088896340735992 ); return ((z+y)+x)+fe; }
tag::cpu_, Dummy> : callable { template<class Sig> struct result; template<class This,class A0> struct result<This(A0)> : meta::strip<A0>{};// NT2_FUNCTOR_CALL(1) { A0 const na = isnez(a0); A0 n = add(shri(a0, 4), Four<A0>()); A0 n1 = shri(n+a0/n, 1); A0 msk = b_and(isle(n1,n), na); n = select(msk,n1,n); n1 = sqr(n); msk = b_or(isgt(n1,a0), b_and(iseqz(n1), na)); n = seladd( msk, n, Mone<A0>()); return seladd(na, Zero<A0>(), n); } }; } } ///////////////////////////////////////////////////////////////////////////// // Implementation when type A0 is arithmetic_ ///////////////////////////////////////////////////////////////////////////// NT2_REGISTER_DISPATCH(tag::sqrt_, tag::cpu_, (A0), ((simd_<arithmetic_<A0>,tag::xop_>)) );