Пример #1
0
void SelfSimDescriptor::compute(const Mat& img, vector<float>& descriptors, Size winStride,
                                const vector<Point>& locations) const
{
    CV_Assert( img.depth() == CV_8U );

    winStride.width = std::max(winStride.width, 1);
    winStride.height = std::max(winStride.height, 1);
    Size gridSize = getGridSize(img.size(), winStride);
    int i, nwindows = locations.empty() ? gridSize.width*gridSize.height : (int)locations.size();
    int border = largeSize/2 + smallSize/2;
    int fsize = (int)getDescriptorSize();
    vector<float> tempFeature(fsize+1);
    descriptors.resize(fsize*nwindows + 1);
    Mat ssd(largeSize, largeSize, CV_32F), mappingMask;
    computeLogPolarMapping(mappingMask);

#if 0 //def _OPENMP
    int nthreads = cvGetNumThreads();
    #pragma omp parallel for num_threads(nthreads)
#endif
    for( i = 0; i < nwindows; i++ )
    {
        Point pt;
        float* feature0 = &descriptors[fsize*i];
        float* feature = &tempFeature[0];
        int x, y, j;

        if( !locations.empty() )
        {
            pt = locations[i];
            if( pt.x < border || pt.x >= img.cols - border ||
                pt.y < border || pt.y >= img.rows - border )
            {
                for( j = 0; j < fsize; j++ )
                    feature0[j] = 0.f;
                continue;
            }
        }
        else
            pt = Point((i % gridSize.width)*winStride.width + border,
                       (i / gridSize.width)*winStride.height + border);

        SSD(img, pt, ssd);

        // Determine in the local neighborhood the largest difference and use for normalization
        float var_noise = 1000.f;
        for( y = -1; y <= 1 ; y++ )
            for( x = -1 ; x <= 1 ; x++ )
                var_noise = std::max(var_noise, ssd.at<float>(largeSize/2+y, largeSize/2+x));

        for( j = 0; j <= fsize; j++ )
            feature[j] = FLT_MAX;

        // Derive feature vector before exp(-x) computation
        // Idea: for all  x,a >= 0, a=const.   we have:
        //       max [ exp( -x / a) ] = exp ( -min(x) / a )
        // Thus, determine min(ssd) and store in feature[...]
        for( y = 0; y < ssd.rows; y++ )
        {
            const schar *mappingMaskPtr = mappingMask.ptr<schar>(y);
            const float *ssdPtr = ssd.ptr<float>(y);
            for( x = 0 ; x < ssd.cols; x++ )
            {
                int index = mappingMaskPtr[x];
                feature[index] = std::min(feature[index], ssdPtr[x]);
            }
        }

        var_noise = -1.f/var_noise;
        for( j = 0; j < fsize; j++ )
            feature0[j] = feature[j]*var_noise;
        Mat _f(1, fsize, CV_32F, feature0);
        cv::exp(_f, _f);
    }
}
void
icvCrossCorr( const CvArr* _img, const CvArr* _templ, CvArr* _corr,
              CvPoint anchor, double delta, int borderType )
{
    // disable OpenMP in the case of Visual Studio,
    // otherwise the performance drops significantly
#undef USE_OPENMP
#if !defined _MSC_VER || defined CV_ICC
    #define USE_OPENMP 1
#endif

    const double block_scale = 4.5;
    const int min_block_size = 256;
    cv::Ptr<CvMat> dft_img[CV_MAX_THREADS];
    cv::Ptr<CvMat> dft_templ;
    std::vector<uchar> buf[CV_MAX_THREADS];
    int k, num_threads = 0;
    
    CvMat istub, *img = (CvMat*)_img;
    CvMat tstub, *templ = (CvMat*)_templ;
    CvMat cstub, *corr = (CvMat*)_corr;
    CvSize dftsize, blocksize;
    int depth, templ_depth, corr_depth, max_depth = CV_32F,
        cn, templ_cn, corr_cn, buf_size = 0,
        tile_count_x, tile_count_y, tile_count;

    img = cvGetMat( img, &istub );
    templ = cvGetMat( templ, &tstub );
    corr = cvGetMat( corr, &cstub );

    if( CV_MAT_DEPTH( img->type ) != CV_8U &&
        CV_MAT_DEPTH( img->type ) != CV_16U &&
        CV_MAT_DEPTH( img->type ) != CV_32F &&
        CV_MAT_DEPTH( img->type ) != CV_64F )
        CV_Error( CV_StsUnsupportedFormat,
        "The function supports only 8u, 16u and 32f data types" );

    if( !CV_ARE_DEPTHS_EQ( img, templ ) && CV_MAT_DEPTH( templ->type ) != CV_32F )
        CV_Error( CV_StsUnsupportedFormat,
        "Template (kernel) must be of the same depth as the input image, or be 32f" );
    
    if( !CV_ARE_DEPTHS_EQ( img, corr ) && CV_MAT_DEPTH( corr->type ) != CV_32F &&
        CV_MAT_DEPTH( corr->type ) != CV_64F )
        CV_Error( CV_StsUnsupportedFormat,
        "The output image must have the same depth as the input image, or be 32f/64f" );

    if( (!CV_ARE_CNS_EQ( img, corr ) || CV_MAT_CN(templ->type) > 1) &&
        (CV_MAT_CN( corr->type ) > 1 || !CV_ARE_CNS_EQ( img, templ)) )
        CV_Error( CV_StsUnsupportedFormat,
        "The output must have the same number of channels as the input (when the template has 1 channel), "
        "or the output must have 1 channel when the input and the template have the same number of channels" );

    depth = CV_MAT_DEPTH(img->type);
    cn = CV_MAT_CN(img->type);
    templ_depth = CV_MAT_DEPTH(templ->type);
    templ_cn = CV_MAT_CN(templ->type);
    corr_depth = CV_MAT_DEPTH(corr->type);
    corr_cn = CV_MAT_CN(corr->type);

    CV_Assert( corr_cn == 1 || delta == 0 );

    max_depth = MAX( max_depth, templ_depth );
    max_depth = MAX( max_depth, depth );
    max_depth = MAX( max_depth, corr_depth );
    if( depth > CV_8U )
        max_depth = CV_64F;

    /*if( img->cols < templ->cols || img->rows < templ->rows )
        CV_Error( CV_StsUnmatchedSizes,
        "Such a combination of image and template/filter size is not supported" );*/

    if( corr->rows > img->rows + templ->rows - 1 ||
        corr->cols > img->cols + templ->cols - 1 )
        CV_Error( CV_StsUnmatchedSizes,
        "output image should not be greater than (W + w - 1)x(H + h - 1)" );

    blocksize.width = cvRound(templ->cols*block_scale);
    blocksize.width = MAX( blocksize.width, min_block_size - templ->cols + 1 );
    blocksize.width = MIN( blocksize.width, corr->cols );
    blocksize.height = cvRound(templ->rows*block_scale);
    blocksize.height = MAX( blocksize.height, min_block_size - templ->rows + 1 );
    blocksize.height = MIN( blocksize.height, corr->rows );

    dftsize.width = cvGetOptimalDFTSize(blocksize.width + templ->cols - 1);
    if( dftsize.width == 1 )
        dftsize.width = 2;
    dftsize.height = cvGetOptimalDFTSize(blocksize.height + templ->rows - 1);
    if( dftsize.width <= 0 || dftsize.height <= 0 )
        CV_Error( CV_StsOutOfRange, "the input arrays are too big" );

    // recompute block size
    blocksize.width = dftsize.width - templ->cols + 1;
    blocksize.width = MIN( blocksize.width, corr->cols );
    blocksize.height = dftsize.height - templ->rows + 1;
    blocksize.height = MIN( blocksize.height, corr->rows );

    dft_templ = cvCreateMat( dftsize.height*templ_cn, dftsize.width, max_depth );

#ifdef USE_OPENMP
    num_threads = cvGetNumThreads();
#else
    num_threads = 1;
#endif

    for( k = 0; k < num_threads; k++ )
        dft_img[k] = cvCreateMat( dftsize.height, dftsize.width, max_depth );

    if( templ_cn > 1 && templ_depth != max_depth )
        buf_size = templ->cols*templ->rows*CV_ELEM_SIZE(templ_depth);

    if( cn > 1 && depth != max_depth )
        buf_size = MAX( buf_size, (blocksize.width + templ->cols - 1)*
            (blocksize.height + templ->rows - 1)*CV_ELEM_SIZE(depth));

    if( (corr_cn > 1 || cn > 1) && corr_depth != max_depth )
        buf_size = MAX( buf_size, blocksize.width*blocksize.height*CV_ELEM_SIZE(corr_depth));

    if( buf_size > 0 )
    {
        for( k = 0; k < num_threads; k++ )
            buf[k].resize(buf_size);
    }

    // compute DFT of each template plane
    for( k = 0; k < templ_cn; k++ )
    {
        CvMat dstub, *src, *dst, temp;
        CvMat* planes[] = { 0, 0, 0, 0 };
        int yofs = k*dftsize.height;

        src = templ;
        dst = cvGetSubRect( dft_templ, &dstub, cvRect(0,yofs,templ->cols,templ->rows));
    
        if( templ_cn > 1 )
        {
            planes[k] = templ_depth == max_depth ? dst :
                cvInitMatHeader( &temp, templ->rows, templ->cols, templ_depth, &buf[0][0] );
            cvSplit( templ, planes[0], planes[1], planes[2], planes[3] );
            src = planes[k];
            planes[k] = 0;
        }

        if( dst != src )
            cvConvert( src, dst );

        if( dft_templ->cols > templ->cols )
        {
            cvGetSubRect( dft_templ, dst, cvRect(templ->cols, yofs,
                          dft_templ->cols - templ->cols, templ->rows) );
            cvZero( dst );
        }
        cvGetSubRect( dft_templ, dst, cvRect(0,yofs,dftsize.width,dftsize.height) );
        cvDFT( dst, dst, CV_DXT_FORWARD + CV_DXT_SCALE, templ->rows );
    }

    tile_count_x = (corr->cols + blocksize.width - 1)/blocksize.width;
    tile_count_y = (corr->rows + blocksize.height - 1)/blocksize.height;
    tile_count = tile_count_x*tile_count_y;

#if defined _OPENMP && defined USE_OPENMP
    #pragma omp parallel for num_threads(num_threads) schedule(dynamic)
#endif
    // calculate correlation by blocks
    for( k = 0; k < tile_count; k++ )
    {
#ifdef USE_OPENMP
        int thread_idx = cvGetThreadNum();
#else
        int thread_idx = 0;
#endif
        int x = (k%tile_count_x)*blocksize.width;
        int y = (k/tile_count_x)*blocksize.height;
        int i, yofs;
        CvMat sstub, dstub, *src, *dst, temp;
        CvMat* planes[] = { 0, 0, 0, 0 };
        CvMat* _dft_img = dft_img[thread_idx];
        uchar* _buf = buf_size > 0 ? &buf[thread_idx][0] : 0;
        CvSize csz = { blocksize.width, blocksize.height }, isz;
        int x0 = x - anchor.x, y0 = y - anchor.y;
        int x1 = MAX( 0, x0 ), y1 = MAX( 0, y0 ), x2, y2;
        csz.width = MIN( csz.width, corr->cols - x );
        csz.height = MIN( csz.height, corr->rows - y );
        isz.width = csz.width + templ->cols - 1;
        isz.height = csz.height + templ->rows - 1;
        x2 = MIN( img->cols, x0 + isz.width );
        y2 = MIN( img->rows, y0 + isz.height );
        
        for( i = 0; i < cn; i++ )
        {
            CvMat dstub1, *dst1;
            yofs = i*dftsize.height;

            src = cvGetSubRect( img, &sstub, cvRect(x1,y1,x2-x1,y2-y1) );
            dst = cvGetSubRect( _dft_img, &dstub,
                cvRect(0,0,isz.width,isz.height) );
            dst1 = dst;
            
            if( x2 - x1 < isz.width || y2 - y1 < isz.height )
                dst1 = cvGetSubRect( _dft_img, &dstub1,
                    cvRect( x1 - x0, y1 - y0, x2 - x1, y2 - y1 ));

            if( cn > 1 )
            {
                planes[i] = dst1;
                if( depth != max_depth )
                    planes[i] = cvInitMatHeader( &temp, y2 - y1, x2 - x1, depth, _buf );
                cvSplit( src, planes[0], planes[1], planes[2], planes[3] );
                src = planes[i];
                planes[i] = 0;
            }

            if( dst1 != src )
                cvConvert( src, dst1 );

            if( dst != dst1 )
                cvCopyMakeBorder( dst1, dst, cvPoint(x1 - x0, y1 - y0), borderType );

            if( dftsize.width > isz.width )
            {
                cvGetSubRect( _dft_img, dst, cvRect(isz.width, 0,
                      dftsize.width - isz.width,dftsize.height) );
                cvZero( dst );
            }

            cvDFT( _dft_img, _dft_img, CV_DXT_FORWARD, isz.height );
            cvGetSubRect( dft_templ, dst,
                cvRect(0,(templ_cn>1?yofs:0),dftsize.width,dftsize.height) );

            cvMulSpectrums( _dft_img, dst, _dft_img, CV_DXT_MUL_CONJ );
            cvDFT( _dft_img, _dft_img, CV_DXT_INVERSE, csz.height );

            src = cvGetSubRect( _dft_img, &sstub, cvRect(0,0,csz.width,csz.height) );
            dst = cvGetSubRect( corr, &dstub, cvRect(x,y,csz.width,csz.height) );

            if( corr_cn > 1 )
            {
                planes[i] = src;
                if( corr_depth != max_depth )
                {
                    planes[i] = cvInitMatHeader( &temp, csz.height, csz.width,
                                                 corr_depth, _buf );
                    cvConvertScale( src, planes[i], 1, delta );
                }
                cvMerge( planes[0], planes[1], planes[2], planes[3], dst );
                planes[i] = 0;                    
            }
            else
            {
                if( i == 0 )
                    cvConvertScale( src, dst, 1, delta );
                else
                {
                    if( max_depth > corr_depth )
                    {
                        cvInitMatHeader( &temp, csz.height, csz.width,
                                         corr_depth, _buf );
                        cvConvert( src, &temp );
                        src = &temp;
                    }
                    cvAcc( src, dst );
                }
            }
        }
    }
}
Пример #3
0
static void
icvTrueDistTrans( const CvMat* src, CvMat* dst )
{
    cv::Ptr<CvMat> buffer = 0;

    int i, m, n;
    int sstep, dstep;
    const float inf = 1e6f;
    int thread_count = cvGetNumThreads();
    int pass1_sz, pass2_sz;

    if( !CV_ARE_SIZES_EQ( src, dst ))
        CV_Error( CV_StsUnmatchedSizes, "" );

    if( CV_MAT_TYPE(src->type) != CV_8UC1 ||
        CV_MAT_TYPE(dst->type) != CV_32FC1 )
        CV_Error( CV_StsUnsupportedFormat,
        "The input image must have 8uC1 type and the output one must have 32fC1 type" );

    m = src->rows;
    n = src->cols;

    // (see stage 1 below):
    // sqr_tab: 2*m, sat_tab: 3*m + 1, d: m*thread_count,
    pass1_sz = src->rows*(5 + thread_count) + 1;
    // (see stage 2):
    // sqr_tab & inv_tab: n each; f & v: n*thread_count each; z: (n+1)*thread_count
    pass2_sz = src->cols*(2 + thread_count*3) + thread_count;
    buffer = cvCreateMat( 1, MAX(pass1_sz, pass2_sz), CV_32FC1 );

    sstep = src->step;
    dstep = dst->step / sizeof(float);

    // stage 1: compute 1d distance transform of each column
    float* sqr_tab = buffer->data.fl;
    int* sat_tab = (int*)(sqr_tab + m*2);
    const int shift = m*2;

    for( i = 0; i < m; i++ )
        sqr_tab[i] = (float)(i*i);
    for( i = m; i < m*2; i++ )
        sqr_tab[i] = inf;
    for( i = 0; i < shift; i++ )
        sat_tab[i] = 0;
    for( ; i <= m*3; i++ )
        sat_tab[i] = i - shift;

#ifdef _OPENMP
    #pragma omp parallel for num_threads(thread_count)
#endif
    for( i = 0; i < n; i++ )
    {
        const uchar* sptr = src->data.ptr + i + (m-1)*sstep;
        float* dptr = dst->data.fl + i;
        int* d = (int*)(sat_tab + m*3+1+m*cvGetThreadNum());
        int j, dist = m-1;

        for( j = m-1; j >= 0; j--, sptr -= sstep )
        {
            dist = (dist + 1) & (sptr[0] == 0 ? 0 : -1);
            d[j] = dist;
        }

        dist = m-1;
        for( j = 0; j < m; j++, dptr += dstep )
        {
            dist = dist + 1 - sat_tab[dist + 1 - d[j] + shift];
            d[j] = dist;
            dptr[0] = sqr_tab[dist];
        }
    }

    // stage 2: compute modified distance transform for each row
    float* inv_tab = buffer->data.fl;
    sqr_tab = inv_tab + n;

    inv_tab[0] = sqr_tab[0] = 0.f;
    for( i = 1; i < n; i++ )
    {
        inv_tab[i] = (float)(0.5/i);
        sqr_tab[i] = (float)(i*i);
    }

#ifdef _OPENMP
    #pragma omp parallel for num_threads(thread_count) schedule(dynamic)
#endif
    for( i = 0; i < m; i++ )
    {
        float* d = (float*)(dst->data.ptr + i*dst->step);
        float* f = sqr_tab + n + (n*3+1)*cvGetThreadNum();
        float* z = f + n;
        int* v = (int*)(z + n + 1);
        int p, q, k;

        v[0] = 0;
        z[0] = -inf;
        z[1] = inf;
        f[0] = d[0];

        for( q = 1, k = 0; q < n; q++ )
        {
            float fq = d[q];
            f[q] = fq;

            for(;;k--)
            {
                p = v[k];
                float s = (fq + sqr_tab[q] - d[p] - sqr_tab[p])*inv_tab[q - p];
                if( s > z[k] )
                {
                    k++;
                    v[k] = q;
                    z[k] = s;
                    z[k+1] = inf;
                    break;
                }
            }
        }

        for( q = 0, k = 0; q < n; q++ )
        {
            while( z[k+1] < q )
                k++;
            p = v[k];
            d[q] = sqr_tab[abs(q - p)] + f[p];
        }
    }

    cvPow( dst, dst, 0.5 );
}