double UMat::dot(InputArray m) const { CV_Assert(m.sameSize(*this) && m.type() == type()); #ifdef HAVE_OPENCL double r = 0; CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) #endif return getMat(ACCESS_READ).dot(m); }
void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi, double timestamp, double duration ) { CV_Assert( _silhouette.type() == CV_8UC1 && _mhi.type() == CV_32FC1 ); CV_Assert( _silhouette.sameSize(_mhi) ); float ts = (float)timestamp; float delbound = (float)(timestamp - duration); CV_OCL_RUN(_mhi.isUMat() && _mhi.dims() <= 2, ocl_updateMotionHistory(_silhouette, _mhi, ts, delbound)) Mat silh = _silhouette.getMat(), mhi = _mhi.getMat(); Size size = silh.size(); if( silh.isContinuous() && mhi.isContinuous() ) { size.width *= size.height; size.height = 1; } #if CV_SSE2 volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2); #endif for(int y = 0; y < size.height; y++ ) { const uchar* silhData = silh.ptr<uchar>(y); float* mhiData = mhi.ptr<float>(y); int x = 0; #if CV_SSE2 if( useSIMD ) { __m128 ts4 = _mm_set1_ps(ts), db4 = _mm_set1_ps(delbound); for( ; x <= size.width - 8; x += 8 ) { __m128i z = _mm_setzero_si128(); __m128i s = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(silhData + x)), z); __m128 s0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, z)), s1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, z)); __m128 v0 = _mm_loadu_ps(mhiData + x), v1 = _mm_loadu_ps(mhiData + x + 4); __m128 fz = _mm_setzero_ps(); v0 = _mm_and_ps(v0, _mm_cmpge_ps(v0, db4)); v1 = _mm_and_ps(v1, _mm_cmpge_ps(v1, db4)); __m128 m0 = _mm_and_ps(_mm_xor_ps(v0, ts4), _mm_cmpneq_ps(s0, fz)); __m128 m1 = _mm_and_ps(_mm_xor_ps(v1, ts4), _mm_cmpneq_ps(s1, fz)); v0 = _mm_xor_ps(v0, m0); v1 = _mm_xor_ps(v1, m1); _mm_storeu_ps(mhiData + x, v0); _mm_storeu_ps(mhiData + x + 4, v1); } } #endif for( ; x < size.width; x++ ) { float val = mhiData[x]; val = silhData[x] ? ts : val < delbound ? 0 : val; mhiData[x] = val; } } }