Пример #1
0
double UMat::dot(InputArray m) const
{
    CV_Assert(m.sameSize(*this) && m.type() == type());

#ifdef HAVE_OPENCL
    double r = 0;
    CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
#endif

    return getMat(ACCESS_READ).dot(m);
}
Пример #2
0
void cv::updateMotionHistory( InputArray _silhouette, InputOutputArray _mhi,
                              double timestamp, double duration )
{
    CV_Assert( _silhouette.type() == CV_8UC1 && _mhi.type() == CV_32FC1 );
    CV_Assert( _silhouette.sameSize(_mhi) );

    float ts = (float)timestamp;
    float delbound = (float)(timestamp - duration);

    CV_OCL_RUN(_mhi.isUMat() && _mhi.dims() <= 2,
               ocl_updateMotionHistory(_silhouette, _mhi, ts, delbound))

    Mat silh = _silhouette.getMat(), mhi = _mhi.getMat();
    Size size = silh.size();

    if( silh.isContinuous() && mhi.isContinuous() )
    {
        size.width *= size.height;
        size.height = 1;
    }

#if CV_SSE2
    volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2);
#endif

    for(int y = 0; y < size.height; y++ )
    {
        const uchar* silhData = silh.ptr<uchar>(y);
        float* mhiData = mhi.ptr<float>(y);
        int x = 0;

#if CV_SSE2
        if( useSIMD )
        {
            __m128 ts4 = _mm_set1_ps(ts), db4 = _mm_set1_ps(delbound);
            for( ; x <= size.width - 8; x += 8 )
            {
                __m128i z = _mm_setzero_si128();
                __m128i s = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(silhData + x)), z);
                __m128 s0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(s, z)), s1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(s, z));
                __m128 v0 = _mm_loadu_ps(mhiData + x), v1 = _mm_loadu_ps(mhiData + x + 4);
                __m128 fz = _mm_setzero_ps();

                v0 = _mm_and_ps(v0, _mm_cmpge_ps(v0, db4));
                v1 = _mm_and_ps(v1, _mm_cmpge_ps(v1, db4));

                __m128 m0 = _mm_and_ps(_mm_xor_ps(v0, ts4), _mm_cmpneq_ps(s0, fz));
                __m128 m1 = _mm_and_ps(_mm_xor_ps(v1, ts4), _mm_cmpneq_ps(s1, fz));

                v0 = _mm_xor_ps(v0, m0);
                v1 = _mm_xor_ps(v1, m1);

                _mm_storeu_ps(mhiData + x, v0);
                _mm_storeu_ps(mhiData + x + 4, v1);
            }
        }
#endif

        for( ; x < size.width; x++ )
        {
            float val = mhiData[x];
            val = silhData[x] ? ts : val < delbound ? 0 : val;
            mhiData[x] = val;
        }
    }
}