Esempio n. 1
0
__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
  // DAG-LABEL: test_mm_cmpeq_pd
  // DAG: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
  //
  // ASM-LABEL: test_mm_cmpeq_pd
  // ASM: cmpeqpd
  return _mm_cmpeq_pd(A, B);
}
Esempio n. 2
0
void _SIMD_cmpeq_pd(__SIMDd a, __SIMDd b, void** resultPtr)
{
  __SIMDd* result = (__SIMDd*)malloc(sizeof(__SIMDd));
  *resultPtr = result;
#ifdef  USE_SSE
  *result = _mm_cmpeq_pd(a,b);
#elif defined USE_AVX
  *result = _mm256_cmp(a,b,0);
#elif defined USE_IBM
  *result = vec_cmpeq(a,b);
#endif
}
Esempio n. 3
0
BI_FORCE_INLINE inline sse_double operator==(const sse_double& o1,
    const sse_double& o2) {
  sse_double res;
  res.packed = _mm_cmpeq_pd(o1.packed, o2.packed);
  return res;
}
Esempio n. 4
0
	void blurRemoveMinMax_(const Mat& src, Mat& dest, const int r)
	{
		const Size ksize = Size(2 * r + 1, 2 * r + 1);
		if (src.data != dest.data)src.copyTo(dest);

		Mat xv;
		Mat nv;
		Mat element = Mat::ones(2 * r + 1, 2 * r + 1, CV_8U);
		dilate(src, xv, element);
		erode(src, nv, element);

		Mat mind;
		Mat maxd;
		Mat mask;
		absdiff(src, nv, mind);//can move to loop
		absdiff(src, xv, maxd);//
		min(mind, maxd, mask);//

		T* n = nv.ptr<T>(0);
		T* x = xv.ptr<T>(0);
		T* d = dest.ptr<T>(0);
		T* nd = mind.ptr<T>(0);
		T* mk = mask.ptr<T>(0);

		int remsize = src.size().area();

#if CV_SSE4_1
		if (src.depth() == CV_8U)
		{

			const int ssesize = src.size().area() / 16;
			remsize = src.size().area() - ssesize * 16;
			for (int i = 0; i < ssesize; i++)
			{
				__m128i mmk = _mm_load_si128((__m128i*)mk);
				__m128i mnd = _mm_load_si128((__m128i*)nd);

				__m128i mmn = _mm_load_si128((__m128i*)n);
				__m128i mmx = _mm_load_si128((__m128i*)x);
				__m128i msk = _mm_cmpeq_epi8(mnd, mmk);
				_mm_stream_si128((__m128i*)d, _mm_blendv_epi8(mmx, mmn, msk));
				nd += 16;
				mk += 16;
				d += 16;
				n += 16;
				x += 16;
			}
		}
		else if (src.depth() == CV_16S || src.depth() == CV_16U)
		{

			const int ssesize = src.size().area() / 8;
			remsize = src.size().area() - ssesize * 8;
			for (int i = 0; i < ssesize; i++)
			{
				__m128i mmk = _mm_load_si128((__m128i*)mk);
				__m128i mnd = _mm_load_si128((__m128i*)nd);

				__m128i mmn = _mm_load_si128((__m128i*)n);
				__m128i mmx = _mm_load_si128((__m128i*)x);
				__m128i msk = _mm_cmpeq_epi16(mnd, mmk);
				_mm_stream_si128((__m128i*)d, _mm_blendv_epi8(mmx, mmn, msk));
				nd += 8;
				mk += 8;
				d += 8;
				n += 8;
				x += 8;
			}
		}
		else if (src.depth() == CV_32F)
		{

			const int ssesize = src.size().area() / 4;
			remsize = src.size().area() - ssesize * 4;
			for (int i = 0; i < ssesize; i++)
			{
				__m128 mmk = _mm_load_ps((float*)mk);
				__m128 mnd = _mm_load_ps((float*)nd);

				__m128 mmn = _mm_load_ps((float*)n);
				__m128 mmx = _mm_load_ps((float*)x);
				__m128 msk = _mm_cmpeq_ps(mnd, mmk);
				_mm_stream_ps((float*)d, _mm_blendv_ps(mmx, mmn, msk));
				nd += 4;
				mk += 4;
				d += 4;
				n += 4;
				x += 4;
			}
		}
		else if (src.depth() == CV_64F)
		{
			const int ssesize = src.size().area() / 2;
			remsize = src.size().area() - ssesize * 2;
			for (int i = 0; i < ssesize; i++)
			{
				__m128d mmk = _mm_load_pd((double*)mk);
				__m128d mnd = _mm_load_pd((double*)nd);

				__m128d mmn = _mm_load_pd((double*)n);
				__m128d mmx = _mm_load_pd((double*)x);
				__m128d msk = _mm_cmpeq_pd(mnd, mmk);
				_mm_stream_pd((double*)d, _mm_blendv_pd(mmx, mmn, msk));
				nd += 2;
				mk += 2;
				d += 2;
				n += 2;
				x += 2;
			}
		}
#endif
		for (int i = 0; i < remsize; i++)
		{
			{
				if (nd[i] == mk[i])
				{
					d[i] = n[i];
				}
				else
				{
					d[i] = x[i];
				}
			}
		}
	}
Esempio n. 5
0
{
  template<class Dummy>
  struct  call< tag::is_equal_( tag::simd_<tag::double_,tag::sse_>
                              , tag::simd_<tag::double_,tag::sse_>
                              )
              , tag::cpu_, Dummy
              >
        : callable
  {
    template<class Sig>           struct result;
    template<class This,class A0>
    struct result<This(A0,A0)> : meta::strip<A0> {};

    NT2_FUNCTOR_CALL(2)
    {
      A0 that = { _mm_cmpeq_pd(a0,a1) };
      return that;
    }
  };
} }

////////////////////////////////////////////////////////////////////////////////
// Overloads implementation for float
////////////////////////////////////////////////////////////////////////////////
NT2_REGISTER_DISPATCH ( tag::is_equal_, tag::cpu_, (A0)
                      , ((simd_<float_<A0>,tag::sse_>))
                        ((simd_<float_<A0>,tag::sse_>))
                      );

namespace nt2 { namespace ext
{
Esempio n. 6
0
 static inline __m128d gen_ones(void)
 {
     __m128d x = gen_zero();
     __m128d ones = _mm_cmpeq_pd(x, x);
     return ones;
 }
Esempio n. 7
0
__m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
  // CHECK-LABEL: @test_mm_cmpeq_pd
  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
  return _mm_cmpeq_pd(__a, __b);
}