static void icvCornerEigenValsVecs( const CvMat* src, CvMat* eigenv, int block_size, int aperture_size, int op_type, double k=0. ) { CvSepFilter dx_filter, dy_filter; CvBoxFilter blur_filter; CvMat *tempsrc = 0; CvMat *Dx = 0, *Dy = 0, *cov = 0; CvMat *sqrt_buf = 0; int buf_size = 1 << 12; CV_FUNCNAME( "icvCornerEigenValsVecs" ); __BEGIN__; int i, j, y, dst_y = 0, max_dy, delta = 0; int aperture_size0 = aperture_size; int temp_step = 0, d_step; uchar* shifted_ptr = 0; int depth, d_depth; int stage = CV_START; CvSobelFixedIPPFunc ipp_sobel_vert = 0, ipp_sobel_horiz = 0; CvFilterFixedIPPFunc ipp_scharr_vert = 0, ipp_scharr_horiz = 0; CvSize el_size, size, stripe_size; int aligned_width; CvPoint el_anchor; double factorx, factory; bool use_ipp = false; if( block_size < 3 || !(block_size & 1) ) CV_ERROR( CV_StsOutOfRange, "averaging window size must be an odd number >= 3" ); if( aperture_size < 3 && aperture_size != CV_SCHARR || !(aperture_size & 1) ) CV_ERROR( CV_StsOutOfRange, "Derivative filter aperture size must be a positive odd number >=3 or CV_SCHARR" ); depth = CV_MAT_DEPTH(src->type); d_depth = depth == CV_8U ? CV_16S : CV_32F; size = cvGetMatSize(src); aligned_width = cvAlign(size.width, 4); aperture_size = aperture_size == CV_SCHARR ? 3 : aperture_size; el_size = cvSize( aperture_size, aperture_size ); el_anchor = cvPoint( aperture_size/2, aperture_size/2 ); if( aperture_size <= 5 && icvFilterSobelVert_8u16s_C1R_p ) { if( depth == CV_8U && aperture_size0 == CV_SCHARR ) { ipp_scharr_vert = icvFilterScharrVert_8u16s_C1R_p; ipp_scharr_horiz = icvFilterScharrHoriz_8u16s_C1R_p; } else if( depth == CV_32F && aperture_size0 == CV_SCHARR ) { ipp_scharr_vert = icvFilterScharrVert_32f_C1R_p; ipp_scharr_horiz = icvFilterScharrHoriz_32f_C1R_p; } else if( depth == CV_8U ) { ipp_sobel_vert = icvFilterSobelVert_8u16s_C1R_p; ipp_sobel_horiz = icvFilterSobelHoriz_8u16s_C1R_p; } else if( depth == CV_32F ) { ipp_sobel_vert = icvFilterSobelVert_32f_C1R_p; ipp_sobel_horiz = icvFilterSobelHoriz_32f_C1R_p; } } if( ipp_sobel_vert && ipp_sobel_horiz || ipp_scharr_vert && ipp_scharr_horiz ) { CV_CALL( tempsrc = icvIPPFilterInit( src, buf_size, cvSize(el_size.width,el_size.height + block_size))); shifted_ptr = tempsrc->data.ptr + el_anchor.y*tempsrc->step + el_anchor.x*CV_ELEM_SIZE(depth); temp_step = tempsrc->step ? tempsrc->step : CV_STUB_STEP; max_dy = tempsrc->rows - aperture_size + 1; use_ipp = true; } else { ipp_sobel_vert = ipp_sobel_horiz = 0; ipp_scharr_vert = ipp_scharr_horiz = 0; CV_CALL( dx_filter.init_deriv( size.width, depth, d_depth, 1, 0, aperture_size0 )); CV_CALL( dy_filter.init_deriv( size.width, depth, d_depth, 0, 1, aperture_size0 )); max_dy = buf_size / src->cols; max_dy = MAX( max_dy, aperture_size + block_size ); } CV_CALL( Dx = cvCreateMat( max_dy, aligned_width, d_depth )); CV_CALL( Dy = cvCreateMat( max_dy, aligned_width, d_depth )); CV_CALL( cov = cvCreateMat( max_dy + block_size + 1, size.width, CV_32FC3 )); CV_CALL( sqrt_buf = cvCreateMat( 2, size.width, CV_32F )); Dx->cols = Dy->cols = size.width; if( !use_ipp ) max_dy -= aperture_size - 1; d_step = Dx->step ? Dx->step : CV_STUB_STEP; CV_CALL(blur_filter.init(size.width, CV_32FC3, CV_32FC3, 0, cvSize(block_size,block_size))); stripe_size = size; factorx = (double)(1 << (aperture_size - 1)) * block_size; if( aperture_size0 == CV_SCHARR ) factorx *= 2; if( depth == CV_8U ) factorx *= 255.; factory = factorx = 1./factorx; if( ipp_sobel_vert ) factory = -factory; for( y = 0; y < size.height; y += delta ) { if( !use_ipp ) { delta = MIN( size.height - y, max_dy ); if( y + delta == size.height ) stage = stage & CV_START ? CV_START + CV_END : CV_END; dx_filter.process( src, Dx, cvRect(0,y,-1,delta), cvPoint(0,0), stage ); stripe_size.height = dy_filter.process( src, Dy, cvRect(0,y,-1,delta), cvPoint(0,0), stage ); } else { delta = icvIPPFilterNextStripe( src, tempsrc, y, el_size, el_anchor ); stripe_size.height = delta; if( ipp_sobel_vert ) { IPPI_CALL( ipp_sobel_vert( shifted_ptr, temp_step, Dx->data.ptr, d_step, stripe_size, aperture_size*10 + aperture_size )); IPPI_CALL( ipp_sobel_horiz( shifted_ptr, temp_step, Dy->data.ptr, d_step, stripe_size, aperture_size*10 + aperture_size )); } else /*if( ipp_scharr_vert )*/ { IPPI_CALL( ipp_scharr_vert( shifted_ptr, temp_step, Dx->data.ptr, d_step, stripe_size )); IPPI_CALL( ipp_scharr_horiz( shifted_ptr, temp_step, Dy->data.ptr, d_step, stripe_size )); } } for( i = 0; i < stripe_size.height; i++ ) { float* cov_data = (float*)(cov->data.ptr + i*cov->step); if( d_depth == CV_16S ) { const short* dxdata = (const short*)(Dx->data.ptr + i*Dx->step); const short* dydata = (const short*)(Dy->data.ptr + i*Dy->step); for( j = 0; j < size.width; j++ ) { double dx = dxdata[j]*factorx; double dy = dydata[j]*factory; cov_data[j*3] = (float)(dx*dx); cov_data[j*3+1] = (float)(dx*dy); cov_data[j*3+2] = (float)(dy*dy); } } else { const float* dxdata = (const float*)(Dx->data.ptr + i*Dx->step); const float* dydata = (const float*)(Dy->data.ptr + i*Dy->step); for( j = 0; j < size.width; j++ ) { double dx = dxdata[j]*factorx; double dy = dydata[j]*factory; cov_data[j*3] = (float)(dx*dx); cov_data[j*3+1] = (float)(dx*dy); cov_data[j*3+2] = (float)(dy*dy); } } } if( y + stripe_size.height >= size.height ) stage = stage & CV_START ? CV_START + CV_END : CV_END; stripe_size.height = blur_filter.process(cov,cov, cvRect(0,0,-1,stripe_size.height),cvPoint(0,0),stage+CV_ISOLATED_ROI); if( op_type == ICV_MINEIGENVAL ) icvCalcMinEigenVal( cov->data.fl, cov->step, (float*)(eigenv->data.ptr + dst_y*eigenv->step), eigenv->step, stripe_size, sqrt_buf ); else if( op_type == ICV_HARRIS ) icvCalcHarris( cov->data.fl, cov->step, (float*)(eigenv->data.ptr + dst_y*eigenv->step), eigenv->step, stripe_size, sqrt_buf, k ); else if( op_type == ICV_EIGENVALSVECS ) icvCalcEigenValsVecs( cov->data.fl, cov->step, (float*)(eigenv->data.ptr + dst_y*eigenv->step), eigenv->step, stripe_size, sqrt_buf ); dst_y += stripe_size.height; stage = CV_MIDDLE; } __END__; cvReleaseMat( &Dx ); cvReleaseMat( &Dy ); cvReleaseMat( &cov ); cvReleaseMat( &sqrt_buf ); cvReleaseMat( &tempsrc ); }
CV_IMPL void cvPreCornerDetect( const void* srcarr, void* dstarr, int aperture_size ) { CvSepFilter dx_filter, dy_filter, d2x_filter, d2y_filter, dxy_filter; CvMat *Dx = 0, *Dy = 0, *D2x = 0, *D2y = 0, *Dxy = 0; CvMat *tempsrc = 0; int buf_size = 1 << 12; CV_FUNCNAME( "cvPreCornerDetect" ); __BEGIN__; int i, j, y, dst_y = 0, max_dy, delta = 0; int temp_step = 0, d_step; uchar* shifted_ptr = 0; int depth, d_depth; int stage = CV_START; CvSobelFixedIPPFunc ipp_sobel_vert = 0, ipp_sobel_horiz = 0, ipp_sobel_vert_second = 0, ipp_sobel_horiz_second = 0, ipp_sobel_cross = 0; CvSize el_size, size, stripe_size; int aligned_width; CvPoint el_anchor; double factor; CvMat stub, *src = (CvMat*)srcarr; CvMat dststub, *dst = (CvMat*)dstarr; bool use_ipp = false; CV_CALL( src = cvGetMat( srcarr, &stub )); CV_CALL( dst = cvGetMat( dst, &dststub )); if( CV_MAT_TYPE(src->type) != CV_8UC1 && CV_MAT_TYPE(src->type) != CV_32FC1 || CV_MAT_TYPE(dst->type) != CV_32FC1 ) CV_ERROR( CV_StsUnsupportedFormat, "Input must be 8uC1 or 32fC1, output must be 32fC1" ); if( !CV_ARE_SIZES_EQ( src, dst )) CV_ERROR( CV_StsUnmatchedSizes, "" ); if( aperture_size == CV_SCHARR ) CV_ERROR( CV_StsOutOfRange, "CV_SCHARR is not supported by this function" ); if( aperture_size < 3 || aperture_size > 7 || !(aperture_size & 1) ) CV_ERROR( CV_StsOutOfRange, "Derivative filter aperture size must be 3, 5 or 7" ); depth = CV_MAT_DEPTH(src->type); d_depth = depth == CV_8U ? CV_16S : CV_32F; size = cvGetMatSize(src); aligned_width = cvAlign(size.width, 4); el_size = cvSize( aperture_size, aperture_size ); el_anchor = cvPoint( aperture_size/2, aperture_size/2 ); if( aperture_size <= 5 && icvFilterSobelVert_8u16s_C1R_p ) { if( depth == CV_8U ) { ipp_sobel_vert = icvFilterSobelVert_8u16s_C1R_p; ipp_sobel_horiz = icvFilterSobelHoriz_8u16s_C1R_p; ipp_sobel_vert_second = icvFilterSobelVertSecond_8u16s_C1R_p; ipp_sobel_horiz_second = icvFilterSobelHorizSecond_8u16s_C1R_p; ipp_sobel_cross = icvFilterSobelCross_8u16s_C1R_p; } else if( depth == CV_32F ) { ipp_sobel_vert = icvFilterSobelVert_32f_C1R_p; ipp_sobel_horiz = icvFilterSobelHoriz_32f_C1R_p; ipp_sobel_vert_second = icvFilterSobelVertSecond_32f_C1R_p; ipp_sobel_horiz_second = icvFilterSobelHorizSecond_32f_C1R_p; ipp_sobel_cross = icvFilterSobelCross_32f_C1R_p; } } if( ipp_sobel_vert && ipp_sobel_horiz && ipp_sobel_vert_second && ipp_sobel_horiz_second && ipp_sobel_cross ) { CV_CALL( tempsrc = icvIPPFilterInit( src, buf_size, el_size )); shifted_ptr = tempsrc->data.ptr + el_anchor.y*tempsrc->step + el_anchor.x*CV_ELEM_SIZE(depth); temp_step = tempsrc->step ? tempsrc->step : CV_STUB_STEP; max_dy = tempsrc->rows - aperture_size + 1; use_ipp = true; } else { ipp_sobel_vert = ipp_sobel_horiz = 0; ipp_sobel_vert_second = ipp_sobel_horiz_second = ipp_sobel_cross = 0; dx_filter.init_deriv( size.width, depth, d_depth, 1, 0, aperture_size ); dy_filter.init_deriv( size.width, depth, d_depth, 0, 1, aperture_size ); d2x_filter.init_deriv( size.width, depth, d_depth, 2, 0, aperture_size ); d2y_filter.init_deriv( size.width, depth, d_depth, 0, 2, aperture_size ); dxy_filter.init_deriv( size.width, depth, d_depth, 1, 1, aperture_size ); max_dy = buf_size / src->cols; max_dy = MAX( max_dy, aperture_size ); } CV_CALL( Dx = cvCreateMat( max_dy, aligned_width, d_depth )); CV_CALL( Dy = cvCreateMat( max_dy, aligned_width, d_depth )); CV_CALL( D2x = cvCreateMat( max_dy, aligned_width, d_depth )); CV_CALL( D2y = cvCreateMat( max_dy, aligned_width, d_depth )); CV_CALL( Dxy = cvCreateMat( max_dy, aligned_width, d_depth )); Dx->cols = Dy->cols = D2x->cols = D2y->cols = Dxy->cols = size.width; if( !use_ipp ) max_dy -= aperture_size - 1; d_step = Dx->step ? Dx->step : CV_STUB_STEP; stripe_size = size; factor = 1 << (aperture_size - 1); if( depth == CV_8U ) factor *= 255; factor = 1./(factor * factor * factor); aperture_size = aperture_size * 10 + aperture_size; for( y = 0; y < size.height; y += delta ) { if( !use_ipp ) { delta = MIN( size.height - y, max_dy ); CvRect roi = cvRect(0,y,size.width,delta); CvPoint origin=cvPoint(0,0); if( y + delta == size.height ) stage = stage & CV_START ? CV_START + CV_END : CV_END; dx_filter.process(src,Dx,roi,origin,stage); dy_filter.process(src,Dy,roi,origin,stage); d2x_filter.process(src,D2x,roi,origin,stage); d2y_filter.process(src,D2y,roi,origin,stage); stripe_size.height = dxy_filter.process(src,Dxy,roi,origin,stage); } else { delta = icvIPPFilterNextStripe( src, tempsrc, y, el_size, el_anchor ); stripe_size.height = delta; IPPI_CALL( ipp_sobel_vert( shifted_ptr, temp_step, Dx->data.ptr, d_step, stripe_size, aperture_size )); IPPI_CALL( ipp_sobel_horiz( shifted_ptr, temp_step, Dy->data.ptr, d_step, stripe_size, aperture_size )); IPPI_CALL( ipp_sobel_vert_second( shifted_ptr, temp_step, D2x->data.ptr, d_step, stripe_size, aperture_size )); IPPI_CALL( ipp_sobel_horiz_second( shifted_ptr, temp_step, D2y->data.ptr, d_step, stripe_size, aperture_size )); IPPI_CALL( ipp_sobel_cross( shifted_ptr, temp_step, Dxy->data.ptr, d_step, stripe_size, aperture_size )); } for( i = 0; i < stripe_size.height; i++, dst_y++ ) { float* dstdata = (float*)(dst->data.ptr + dst_y*dst->step); if( d_depth == CV_16S ) { const short* dxdata = (const short*)(Dx->data.ptr + i*Dx->step); const short* dydata = (const short*)(Dy->data.ptr + i*Dy->step); const short* d2xdata = (const short*)(D2x->data.ptr + i*D2x->step); const short* d2ydata = (const short*)(D2y->data.ptr + i*D2y->step); const short* dxydata = (const short*)(Dxy->data.ptr + i*Dxy->step); for( j = 0; j < stripe_size.width; j++ ) { double dx = dxdata[j]; double dx2 = dx * dx; double dy = dydata[j]; double dy2 = dy * dy; dstdata[j] = (float)(factor*(dx2*d2ydata[j] + dy2*d2xdata[j] - 2*dx*dy*dxydata[j])); } } else { const float* dxdata = (const float*)(Dx->data.ptr + i*Dx->step); const float* dydata = (const float*)(Dy->data.ptr + i*Dy->step); const float* d2xdata = (const float*)(D2x->data.ptr + i*D2x->step); const float* d2ydata = (const float*)(D2y->data.ptr + i*D2y->step); const float* dxydata = (const float*)(Dxy->data.ptr + i*Dxy->step); for( j = 0; j < stripe_size.width; j++ ) { double dx = dxdata[j]; double dy = dydata[j]; dstdata[j] = (float)(factor*(dx*dx*d2ydata[j] + dy*dy*d2xdata[j] - 2*dx*dy*dxydata[j])); } } } stage = CV_MIDDLE; } __END__; cvReleaseMat( &Dx ); cvReleaseMat( &Dy ); cvReleaseMat( &D2x ); cvReleaseMat( &D2y ); cvReleaseMat( &Dxy ); cvReleaseMat( &tempsrc ); }
CV_IMPL void cvFilter2D( const CvArr* _src, CvArr* _dst, const CvMat* _kernel, CvPoint anchor ) { // below that approximate size OpenCV is faster const int ipp_lower_limit = 20; static CvFuncTable filter_tab; static int inittab = 0; CvFilterState *state = 0; float* kernel_data = 0; int local_alloc = 1; CvMat* temp = 0; CV_FUNCNAME( "cvFilter2D" ); __BEGIN__; CvFilterFunc func = 0; int coi1 = 0, coi2 = 0; CvMat srcstub, *src = (CvMat*)_src; CvMat dststub, *dst = (CvMat*)_dst; CvSize size; int type, depth; int src_step, dst_step; CvMat kernel_hdr; const CvMat* kernel = _kernel; if( !inittab ) { icvInitFilterTab( &filter_tab ); inittab = 1; } CV_CALL( src = cvGetMat( src, &srcstub, &coi1 )); CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 )); if( coi1 != 0 || coi2 != 0 ) CV_ERROR( CV_BadCOI, "" ); type = CV_MAT_TYPE( src->type ); if( !CV_ARE_SIZES_EQ( src, dst )) CV_ERROR( CV_StsUnmatchedSizes, "" ); if( !CV_ARE_TYPES_EQ( src, dst )) CV_ERROR( CV_StsUnmatchedFormats, "" ); if( !CV_IS_MAT(kernel) || (CV_MAT_TYPE(kernel->type) != CV_32F && CV_MAT_TYPE(kernel->type) != CV_64F )) CV_ERROR( CV_StsBadArg, "kernel must be single-channel floating-point matrix" ); if( anchor.x == -1 && anchor.y == -1 ) anchor = cvPoint(kernel->cols/2,kernel->rows/2); if( (unsigned)anchor.x >= (unsigned)kernel->cols || (unsigned)anchor.y >= (unsigned)kernel->rows ) CV_ERROR( CV_StsOutOfRange, "anchor point is out of kernel" ); if( CV_MAT_TYPE(kernel->type) != CV_32FC1 || !CV_IS_MAT_CONT(kernel->type) || icvFilter_8u_C1R_p ) { int sz = kernel->rows*kernel->cols*sizeof(kernel_data[0]); if( sz < CV_MAX_LOCAL_SIZE ) kernel_data = (float*)cvStackAlloc( sz ); else { CV_CALL( kernel_data = (float*)cvAlloc( sz )); local_alloc = 0; } kernel_hdr = cvMat( kernel->rows, kernel->cols, CV_32F, kernel_data ); if( CV_MAT_TYPE(kernel->type) == CV_32FC1 ) cvCopy( kernel, &kernel_hdr ); else cvConvertScale( kernel, &kernel_hdr, 1, 0 ); kernel = &kernel_hdr; } size = cvGetMatSize( src ); depth = CV_MAT_DEPTH(type); src_step = src->step; dst_step = dst->step ? dst->step : CV_STUB_STEP; if( icvFilter_8u_C1R_p && (src->rows >= ipp_lower_limit || src->cols >= ipp_lower_limit) ) { CvFilterIPPFunc ipp_func = type == CV_8UC1 ? (CvFilterIPPFunc)icvFilter_8u_C1R_p : type == CV_8UC3 ? (CvFilterIPPFunc)icvFilter_8u_C3R_p : type == CV_8UC4 ? (CvFilterIPPFunc)icvFilter_8u_C4R_p : type == CV_16SC1 ? (CvFilterIPPFunc)icvFilter_16s_C1R_p : type == CV_16SC3 ? (CvFilterIPPFunc)icvFilter_16s_C3R_p : type == CV_16SC4 ? (CvFilterIPPFunc)icvFilter_16s_C4R_p : type == CV_32FC1 ? (CvFilterIPPFunc)icvFilter_32f_C1R_p : type == CV_32FC3 ? (CvFilterIPPFunc)icvFilter_32f_C3R_p : type == CV_32FC4 ? (CvFilterIPPFunc)icvFilter_32f_C4R_p : 0; if( ipp_func ) { CvSize el_size = { kernel->cols, kernel->rows }; CvPoint el_anchor = { el_size.width - anchor.x - 1, el_size.height - anchor.y - 1 }; int stripe_size = 1 << 16; // the optimal value may depend on CPU cache, // overhead of current IPP code etc. const uchar* shifted_ptr; int i, j, y, dy = 0; int temp_step; // mirror the kernel around the center for( i = 0; i < (el_size.height+1)/2; i++ ) { float* top_row = kernel->data.fl + el_size.width*i; float* bottom_row = kernel->data.fl + el_size.width*(el_size.height - i - 1); for( j = 0; j < (el_size.width+1)/2; j++ ) { float a = top_row[j], b = top_row[el_size.width - j - 1]; float c = bottom_row[j], d = bottom_row[el_size.width - j - 1]; top_row[j] = d; top_row[el_size.width - j - 1] = c; bottom_row[j] = b; bottom_row[el_size.width - j - 1] = a; } } CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size )); shifted_ptr = temp->data.ptr + anchor.y*temp->step + anchor.x*CV_ELEM_SIZE(type); temp_step = temp->step ? temp->step : CV_STUB_STEP; for( y = 0; y < src->rows; y += dy ) { dy = icvIPPFilterNextStripe( src, temp, y, el_size, anchor ); IPPI_CALL( ipp_func( shifted_ptr, temp_step, dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy), kernel->data.fl, el_size, el_anchor )); } EXIT; } } CV_CALL( state = icvFilterInitAlloc( src->cols, cv32f, CV_MAT_CN(type), cvSize(kernel->cols, kernel->rows), anchor, kernel->data.ptr, ICV_GENERIC_KERNEL )); if( CV_MAT_CN(type) == 2 ) CV_ERROR( CV_BadNumChannels, "Unsupported number of channels" ); func = (CvFilterFunc)(filter_tab.fn_2d[depth]); if( !func ) CV_ERROR( CV_StsUnsupportedFormat, "" ); if( size.height == 1 ) src_step = dst_step = CV_STUB_STEP; IPPI_CALL( func( src->data.ptr, src_step, dst->data.ptr, dst_step, &size, state, 0 )); __END__; cvReleaseMat( &temp ); icvFilterFree( &state ); if( !local_alloc ) cvFree( (void**)&kernel_data ); }