void SelfSimDescriptor::compute(const Mat& img, vector<float>& descriptors, Size winStride, const vector<Point>& locations) const { CV_Assert( img.depth() == CV_8U ); winStride.width = std::max(winStride.width, 1); winStride.height = std::max(winStride.height, 1); Size gridSize = getGridSize(img.size(), winStride); int i, nwindows = locations.empty() ? gridSize.width*gridSize.height : (int)locations.size(); int border = largeSize/2 + smallSize/2; int fsize = (int)getDescriptorSize(); vector<float> tempFeature(fsize+1); descriptors.resize(fsize*nwindows + 1); Mat ssd(largeSize, largeSize, CV_32F), mappingMask; computeLogPolarMapping(mappingMask); #if 0 //def _OPENMP int nthreads = cvGetNumThreads(); #pragma omp parallel for num_threads(nthreads) #endif for( i = 0; i < nwindows; i++ ) { Point pt; float* feature0 = &descriptors[fsize*i]; float* feature = &tempFeature[0]; int x, y, j; if( !locations.empty() ) { pt = locations[i]; if( pt.x < border || pt.x >= img.cols - border || pt.y < border || pt.y >= img.rows - border ) { for( j = 0; j < fsize; j++ ) feature0[j] = 0.f; continue; } } else pt = Point((i % gridSize.width)*winStride.width + border, (i / gridSize.width)*winStride.height + border); SSD(img, pt, ssd); // Determine in the local neighborhood the largest difference and use for normalization float var_noise = 1000.f; for( y = -1; y <= 1 ; y++ ) for( x = -1 ; x <= 1 ; x++ ) var_noise = std::max(var_noise, ssd.at<float>(largeSize/2+y, largeSize/2+x)); for( j = 0; j <= fsize; j++ ) feature[j] = FLT_MAX; // Derive feature vector before exp(-x) computation // Idea: for all x,a >= 0, a=const. we have: // max [ exp( -x / a) ] = exp ( -min(x) / a ) // Thus, determine min(ssd) and store in feature[...] for( y = 0; y < ssd.rows; y++ ) { const schar *mappingMaskPtr = mappingMask.ptr<schar>(y); const float *ssdPtr = ssd.ptr<float>(y); for( x = 0 ; x < ssd.cols; x++ ) { int index = mappingMaskPtr[x]; feature[index] = std::min(feature[index], ssdPtr[x]); } } var_noise = -1.f/var_noise; for( j = 0; j < fsize; j++ ) feature0[j] = feature[j]*var_noise; Mat _f(1, fsize, CV_32F, feature0); cv::exp(_f, _f); } }
void icvCrossCorr( const CvArr* _img, const CvArr* _templ, CvArr* _corr, CvPoint anchor, double delta, int borderType ) { // disable OpenMP in the case of Visual Studio, // otherwise the performance drops significantly #undef USE_OPENMP #if !defined _MSC_VER || defined CV_ICC #define USE_OPENMP 1 #endif const double block_scale = 4.5; const int min_block_size = 256; cv::Ptr<CvMat> dft_img[CV_MAX_THREADS]; cv::Ptr<CvMat> dft_templ; std::vector<uchar> buf[CV_MAX_THREADS]; int k, num_threads = 0; CvMat istub, *img = (CvMat*)_img; CvMat tstub, *templ = (CvMat*)_templ; CvMat cstub, *corr = (CvMat*)_corr; CvSize dftsize, blocksize; int depth, templ_depth, corr_depth, max_depth = CV_32F, cn, templ_cn, corr_cn, buf_size = 0, tile_count_x, tile_count_y, tile_count; img = cvGetMat( img, &istub ); templ = cvGetMat( templ, &tstub ); corr = cvGetMat( corr, &cstub ); if( CV_MAT_DEPTH( img->type ) != CV_8U && CV_MAT_DEPTH( img->type ) != CV_16U && CV_MAT_DEPTH( img->type ) != CV_32F && CV_MAT_DEPTH( img->type ) != CV_64F ) CV_Error( CV_StsUnsupportedFormat, "The function supports only 8u, 16u and 32f data types" ); if( !CV_ARE_DEPTHS_EQ( img, templ ) && CV_MAT_DEPTH( templ->type ) != CV_32F ) CV_Error( CV_StsUnsupportedFormat, "Template (kernel) must be of the same depth as the input image, or be 32f" ); if( !CV_ARE_DEPTHS_EQ( img, corr ) && CV_MAT_DEPTH( corr->type ) != CV_32F && CV_MAT_DEPTH( corr->type ) != CV_64F ) CV_Error( CV_StsUnsupportedFormat, "The output image must have the same depth as the input image, or be 32f/64f" ); if( (!CV_ARE_CNS_EQ( img, corr ) || CV_MAT_CN(templ->type) > 1) && (CV_MAT_CN( corr->type ) > 1 || !CV_ARE_CNS_EQ( img, templ)) ) CV_Error( CV_StsUnsupportedFormat, "The output must have the same number of channels as the input (when the template has 1 channel), " "or the output must have 1 channel when the input and the template have the same number of channels" ); depth = CV_MAT_DEPTH(img->type); cn = CV_MAT_CN(img->type); templ_depth = CV_MAT_DEPTH(templ->type); templ_cn = CV_MAT_CN(templ->type); corr_depth = CV_MAT_DEPTH(corr->type); corr_cn = CV_MAT_CN(corr->type); CV_Assert( corr_cn == 1 || delta == 0 ); max_depth = MAX( max_depth, templ_depth ); max_depth = MAX( max_depth, depth ); max_depth = MAX( max_depth, corr_depth ); if( depth > CV_8U ) max_depth = CV_64F; /*if( img->cols < templ->cols || img->rows < templ->rows ) CV_Error( CV_StsUnmatchedSizes, "Such a combination of image and template/filter size is not supported" );*/ if( corr->rows > img->rows + templ->rows - 1 || corr->cols > img->cols + templ->cols - 1 ) CV_Error( CV_StsUnmatchedSizes, "output image should not be greater than (W + w - 1)x(H + h - 1)" ); blocksize.width = cvRound(templ->cols*block_scale); blocksize.width = MAX( blocksize.width, min_block_size - templ->cols + 1 ); blocksize.width = MIN( blocksize.width, corr->cols ); blocksize.height = cvRound(templ->rows*block_scale); blocksize.height = MAX( blocksize.height, min_block_size - templ->rows + 1 ); blocksize.height = MIN( blocksize.height, corr->rows ); dftsize.width = cvGetOptimalDFTSize(blocksize.width + templ->cols - 1); if( dftsize.width == 1 ) dftsize.width = 2; dftsize.height = cvGetOptimalDFTSize(blocksize.height + templ->rows - 1); if( dftsize.width <= 0 || dftsize.height <= 0 ) CV_Error( CV_StsOutOfRange, "the input arrays are too big" ); // recompute block size blocksize.width = dftsize.width - templ->cols + 1; blocksize.width = MIN( blocksize.width, corr->cols ); blocksize.height = dftsize.height - templ->rows + 1; blocksize.height = MIN( blocksize.height, corr->rows ); dft_templ = cvCreateMat( dftsize.height*templ_cn, dftsize.width, max_depth ); #ifdef USE_OPENMP num_threads = cvGetNumThreads(); #else num_threads = 1; #endif for( k = 0; k < num_threads; k++ ) dft_img[k] = cvCreateMat( dftsize.height, dftsize.width, max_depth ); if( templ_cn > 1 && templ_depth != max_depth ) buf_size = templ->cols*templ->rows*CV_ELEM_SIZE(templ_depth); if( cn > 1 && depth != max_depth ) buf_size = MAX( buf_size, (blocksize.width + templ->cols - 1)* (blocksize.height + templ->rows - 1)*CV_ELEM_SIZE(depth)); if( (corr_cn > 1 || cn > 1) && corr_depth != max_depth ) buf_size = MAX( buf_size, blocksize.width*blocksize.height*CV_ELEM_SIZE(corr_depth)); if( buf_size > 0 ) { for( k = 0; k < num_threads; k++ ) buf[k].resize(buf_size); } // compute DFT of each template plane for( k = 0; k < templ_cn; k++ ) { CvMat dstub, *src, *dst, temp; CvMat* planes[] = { 0, 0, 0, 0 }; int yofs = k*dftsize.height; src = templ; dst = cvGetSubRect( dft_templ, &dstub, cvRect(0,yofs,templ->cols,templ->rows)); if( templ_cn > 1 ) { planes[k] = templ_depth == max_depth ? dst : cvInitMatHeader( &temp, templ->rows, templ->cols, templ_depth, &buf[0][0] ); cvSplit( templ, planes[0], planes[1], planes[2], planes[3] ); src = planes[k]; planes[k] = 0; } if( dst != src ) cvConvert( src, dst ); if( dft_templ->cols > templ->cols ) { cvGetSubRect( dft_templ, dst, cvRect(templ->cols, yofs, dft_templ->cols - templ->cols, templ->rows) ); cvZero( dst ); } cvGetSubRect( dft_templ, dst, cvRect(0,yofs,dftsize.width,dftsize.height) ); cvDFT( dst, dst, CV_DXT_FORWARD + CV_DXT_SCALE, templ->rows ); } tile_count_x = (corr->cols + blocksize.width - 1)/blocksize.width; tile_count_y = (corr->rows + blocksize.height - 1)/blocksize.height; tile_count = tile_count_x*tile_count_y; #if defined _OPENMP && defined USE_OPENMP #pragma omp parallel for num_threads(num_threads) schedule(dynamic) #endif // calculate correlation by blocks for( k = 0; k < tile_count; k++ ) { #ifdef USE_OPENMP int thread_idx = cvGetThreadNum(); #else int thread_idx = 0; #endif int x = (k%tile_count_x)*blocksize.width; int y = (k/tile_count_x)*blocksize.height; int i, yofs; CvMat sstub, dstub, *src, *dst, temp; CvMat* planes[] = { 0, 0, 0, 0 }; CvMat* _dft_img = dft_img[thread_idx]; uchar* _buf = buf_size > 0 ? &buf[thread_idx][0] : 0; CvSize csz = { blocksize.width, blocksize.height }, isz; int x0 = x - anchor.x, y0 = y - anchor.y; int x1 = MAX( 0, x0 ), y1 = MAX( 0, y0 ), x2, y2; csz.width = MIN( csz.width, corr->cols - x ); csz.height = MIN( csz.height, corr->rows - y ); isz.width = csz.width + templ->cols - 1; isz.height = csz.height + templ->rows - 1; x2 = MIN( img->cols, x0 + isz.width ); y2 = MIN( img->rows, y0 + isz.height ); for( i = 0; i < cn; i++ ) { CvMat dstub1, *dst1; yofs = i*dftsize.height; src = cvGetSubRect( img, &sstub, cvRect(x1,y1,x2-x1,y2-y1) ); dst = cvGetSubRect( _dft_img, &dstub, cvRect(0,0,isz.width,isz.height) ); dst1 = dst; if( x2 - x1 < isz.width || y2 - y1 < isz.height ) dst1 = cvGetSubRect( _dft_img, &dstub1, cvRect( x1 - x0, y1 - y0, x2 - x1, y2 - y1 )); if( cn > 1 ) { planes[i] = dst1; if( depth != max_depth ) planes[i] = cvInitMatHeader( &temp, y2 - y1, x2 - x1, depth, _buf ); cvSplit( src, planes[0], planes[1], planes[2], planes[3] ); src = planes[i]; planes[i] = 0; } if( dst1 != src ) cvConvert( src, dst1 ); if( dst != dst1 ) cvCopyMakeBorder( dst1, dst, cvPoint(x1 - x0, y1 - y0), borderType ); if( dftsize.width > isz.width ) { cvGetSubRect( _dft_img, dst, cvRect(isz.width, 0, dftsize.width - isz.width,dftsize.height) ); cvZero( dst ); } cvDFT( _dft_img, _dft_img, CV_DXT_FORWARD, isz.height ); cvGetSubRect( dft_templ, dst, cvRect(0,(templ_cn>1?yofs:0),dftsize.width,dftsize.height) ); cvMulSpectrums( _dft_img, dst, _dft_img, CV_DXT_MUL_CONJ ); cvDFT( _dft_img, _dft_img, CV_DXT_INVERSE, csz.height ); src = cvGetSubRect( _dft_img, &sstub, cvRect(0,0,csz.width,csz.height) ); dst = cvGetSubRect( corr, &dstub, cvRect(x,y,csz.width,csz.height) ); if( corr_cn > 1 ) { planes[i] = src; if( corr_depth != max_depth ) { planes[i] = cvInitMatHeader( &temp, csz.height, csz.width, corr_depth, _buf ); cvConvertScale( src, planes[i], 1, delta ); } cvMerge( planes[0], planes[1], planes[2], planes[3], dst ); planes[i] = 0; } else { if( i == 0 ) cvConvertScale( src, dst, 1, delta ); else { if( max_depth > corr_depth ) { cvInitMatHeader( &temp, csz.height, csz.width, corr_depth, _buf ); cvConvert( src, &temp ); src = &temp; } cvAcc( src, dst ); } } } } }
static void icvTrueDistTrans( const CvMat* src, CvMat* dst ) { cv::Ptr<CvMat> buffer = 0; int i, m, n; int sstep, dstep; const float inf = 1e6f; int thread_count = cvGetNumThreads(); int pass1_sz, pass2_sz; if( !CV_ARE_SIZES_EQ( src, dst )) CV_Error( CV_StsUnmatchedSizes, "" ); if( CV_MAT_TYPE(src->type) != CV_8UC1 || CV_MAT_TYPE(dst->type) != CV_32FC1 ) CV_Error( CV_StsUnsupportedFormat, "The input image must have 8uC1 type and the output one must have 32fC1 type" ); m = src->rows; n = src->cols; // (see stage 1 below): // sqr_tab: 2*m, sat_tab: 3*m + 1, d: m*thread_count, pass1_sz = src->rows*(5 + thread_count) + 1; // (see stage 2): // sqr_tab & inv_tab: n each; f & v: n*thread_count each; z: (n+1)*thread_count pass2_sz = src->cols*(2 + thread_count*3) + thread_count; buffer = cvCreateMat( 1, MAX(pass1_sz, pass2_sz), CV_32FC1 ); sstep = src->step; dstep = dst->step / sizeof(float); // stage 1: compute 1d distance transform of each column float* sqr_tab = buffer->data.fl; int* sat_tab = (int*)(sqr_tab + m*2); const int shift = m*2; for( i = 0; i < m; i++ ) sqr_tab[i] = (float)(i*i); for( i = m; i < m*2; i++ ) sqr_tab[i] = inf; for( i = 0; i < shift; i++ ) sat_tab[i] = 0; for( ; i <= m*3; i++ ) sat_tab[i] = i - shift; #ifdef _OPENMP #pragma omp parallel for num_threads(thread_count) #endif for( i = 0; i < n; i++ ) { const uchar* sptr = src->data.ptr + i + (m-1)*sstep; float* dptr = dst->data.fl + i; int* d = (int*)(sat_tab + m*3+1+m*cvGetThreadNum()); int j, dist = m-1; for( j = m-1; j >= 0; j--, sptr -= sstep ) { dist = (dist + 1) & (sptr[0] == 0 ? 0 : -1); d[j] = dist; } dist = m-1; for( j = 0; j < m; j++, dptr += dstep ) { dist = dist + 1 - sat_tab[dist + 1 - d[j] + shift]; d[j] = dist; dptr[0] = sqr_tab[dist]; } } // stage 2: compute modified distance transform for each row float* inv_tab = buffer->data.fl; sqr_tab = inv_tab + n; inv_tab[0] = sqr_tab[0] = 0.f; for( i = 1; i < n; i++ ) { inv_tab[i] = (float)(0.5/i); sqr_tab[i] = (float)(i*i); } #ifdef _OPENMP #pragma omp parallel for num_threads(thread_count) schedule(dynamic) #endif for( i = 0; i < m; i++ ) { float* d = (float*)(dst->data.ptr + i*dst->step); float* f = sqr_tab + n + (n*3+1)*cvGetThreadNum(); float* z = f + n; int* v = (int*)(z + n + 1); int p, q, k; v[0] = 0; z[0] = -inf; z[1] = inf; f[0] = d[0]; for( q = 1, k = 0; q < n; q++ ) { float fq = d[q]; f[q] = fq; for(;;k--) { p = v[k]; float s = (fq + sqr_tab[q] - d[p] - sqr_tab[p])*inv_tab[q - p]; if( s > z[k] ) { k++; v[k] = q; z[k] = s; z[k+1] = inf; break; } } } for( q = 0, k = 0; q < n; q++ ) { while( z[k+1] < q ) k++; p = v[k]; d[q] = sqr_tab[abs(q - p)] + f[p]; } } cvPow( dst, dst, 0.5 ); }