template<class Extension,class Info> struct call<nbtrue_,tag::simd_(tag::arithmetic_,Extension),Info> { typedef int32_t result_type; NT2_FUNCTOR_CALL_DISPATCH( 1, typename nt2::meta::scalar_of<A0>::type, (3, (float,double,arithmetic_)) ) NT2_FUNCTOR_CALL_EVAL_IF(1, float) { typedef typename meta::as_real<A0>::type type; int32_t r = _mm256_movemask_ps(isnez(a0)); return (r&1)+((r>>1)&1)+((r>>2)&1)+(r>>3&1)+((r>>4)&1)+((r>>5)&1)+(r>>6&1)+(r>>7); // return __builtin_popcount(_mm_movemask_ps(isnez(cast<type>(a0)))); } NT2_FUNCTOR_CALL_EVAL_IF(1, double) { int32_t r = _mm256_movemask_pd(isnez(a0)); return (r&1)+(r>>1&1)+((r>>2)&1)+(r>>3); } NT2_FUNCTOR_CALL_EVAL_IF(1, arithmetic_) { typedef typename meta::scalar_of<A0>::type sctype; typedef typename simd::native<sctype, tag::sse_ > svtype; svtype a00 = { _mm256_extractf128_si256(a0, 0)}; svtype a01 = { _mm256_extractf128_si256(a0, 1)}; return nbtrue(a00)+nbtrue(a01);
((simd_<uint8_<A0>,tag::xop_>)) ); namespace nt2 { namespace ext { template<class Dummy> struct call<tag::sqrt_(tag::simd_<tag::uint8_, tag::xop_)), tag::cpu_, Dummy> : callable { template<class Sig> struct result; template<class This,class A0> struct result<This(A0)> : meta::strip<A0>{};// NT2_FUNCTOR_CALL(1) { A0 const na = isnez(a0); A0 n = add(shri(a0, 4), Four<A0>()); A0 n1 = shri(n+a0/n, 1); A0 msk = b_and(isle(n1,n), na); n = select(msk,n1,n); n1 = sqr(n); msk = b_or(isgt(n1,a0), b_and(iseqz(n1), na)); n = seladd( msk, n, Mone<A0>()); return seladd(na, Zero<A0>(), n); } }; } } /////////////////////////////////////////////////////////////////////////////