예제 #1
0
파일: cvcorner.cpp 프로젝트: cybertk/opencv
static void
icvCornerEigenValsVecs( const CvMat* src, CvMat* eigenv, int block_size,
                        int aperture_size, int op_type, double k=0. )
{
    CvSepFilter dx_filter, dy_filter;
    CvBoxFilter blur_filter;
    CvMat *tempsrc = 0;
    CvMat *Dx = 0, *Dy = 0, *cov = 0;
    CvMat *sqrt_buf = 0;

    int buf_size = 1 << 12;

    CV_FUNCNAME( "icvCornerEigenValsVecs" );

    __BEGIN__;

    int i, j, y, dst_y = 0, max_dy, delta = 0;
    int aperture_size0 = aperture_size;
    int temp_step = 0, d_step;
    uchar* shifted_ptr = 0;
    int depth, d_depth;
    int stage = CV_START;
    CvSobelFixedIPPFunc ipp_sobel_vert = 0, ipp_sobel_horiz = 0;
    CvFilterFixedIPPFunc ipp_scharr_vert = 0, ipp_scharr_horiz = 0;
    CvSize el_size, size, stripe_size;
    int aligned_width;
    CvPoint el_anchor;
    double factorx, factory;
    bool use_ipp = false;

    if( block_size < 3 || !(block_size & 1) )
        CV_ERROR( CV_StsOutOfRange, "averaging window size must be an odd number >= 3" );

    if( aperture_size < 3 && aperture_size != CV_SCHARR || !(aperture_size & 1) )
        CV_ERROR( CV_StsOutOfRange,
                  "Derivative filter aperture size must be a positive odd number >=3 or CV_SCHARR" );

    depth = CV_MAT_DEPTH(src->type);
    d_depth = depth == CV_8U ? CV_16S : CV_32F;

    size = cvGetMatSize(src);
    aligned_width = cvAlign(size.width, 4);

    aperture_size = aperture_size == CV_SCHARR ? 3 : aperture_size;
    el_size = cvSize( aperture_size, aperture_size );
    el_anchor = cvPoint( aperture_size/2, aperture_size/2 );

    if( aperture_size <= 5 && icvFilterSobelVert_8u16s_C1R_p )
    {
        if( depth == CV_8U && aperture_size0 == CV_SCHARR )
        {
            ipp_scharr_vert = icvFilterScharrVert_8u16s_C1R_p;
            ipp_scharr_horiz = icvFilterScharrHoriz_8u16s_C1R_p;
        }
        else if( depth == CV_32F && aperture_size0 == CV_SCHARR )
        {
            ipp_scharr_vert = icvFilterScharrVert_32f_C1R_p;
            ipp_scharr_horiz = icvFilterScharrHoriz_32f_C1R_p;
        }
        else if( depth == CV_8U )
        {
            ipp_sobel_vert = icvFilterSobelVert_8u16s_C1R_p;
            ipp_sobel_horiz = icvFilterSobelHoriz_8u16s_C1R_p;
        }
        else if( depth == CV_32F )
        {
            ipp_sobel_vert = icvFilterSobelVert_32f_C1R_p;
            ipp_sobel_horiz = icvFilterSobelHoriz_32f_C1R_p;
        }
    }

    if( ipp_sobel_vert && ipp_sobel_horiz ||
            ipp_scharr_vert && ipp_scharr_horiz )
    {
        CV_CALL( tempsrc = icvIPPFilterInit( src, buf_size,
                                             cvSize(el_size.width,el_size.height + block_size)));
        shifted_ptr = tempsrc->data.ptr + el_anchor.y*tempsrc->step +
                      el_anchor.x*CV_ELEM_SIZE(depth);
        temp_step = tempsrc->step ? tempsrc->step : CV_STUB_STEP;
        max_dy = tempsrc->rows - aperture_size + 1;
        use_ipp = true;
    }
    else
    {
        ipp_sobel_vert = ipp_sobel_horiz = 0;
        ipp_scharr_vert = ipp_scharr_horiz = 0;

        CV_CALL( dx_filter.init_deriv( size.width, depth, d_depth, 1, 0, aperture_size0 ));
        CV_CALL( dy_filter.init_deriv( size.width, depth, d_depth, 0, 1, aperture_size0 ));
        max_dy = buf_size / src->cols;
        max_dy = MAX( max_dy, aperture_size + block_size );
    }

    CV_CALL( Dx = cvCreateMat( max_dy, aligned_width, d_depth ));
    CV_CALL( Dy = cvCreateMat( max_dy, aligned_width, d_depth ));
    CV_CALL( cov = cvCreateMat( max_dy + block_size + 1, size.width, CV_32FC3 ));
    CV_CALL( sqrt_buf = cvCreateMat( 2, size.width, CV_32F ));
    Dx->cols = Dy->cols = size.width;

    if( !use_ipp )
        max_dy -= aperture_size - 1;
    d_step = Dx->step ? Dx->step : CV_STUB_STEP;

    CV_CALL(blur_filter.init(size.width, CV_32FC3, CV_32FC3, 0, cvSize(block_size,block_size)));
    stripe_size = size;

    factorx = (double)(1 << (aperture_size - 1)) * block_size;
    if( aperture_size0 == CV_SCHARR )
        factorx *= 2;
    if( depth == CV_8U )
        factorx *= 255.;
    factory = factorx = 1./factorx;
    if( ipp_sobel_vert )
        factory = -factory;

    for( y = 0; y < size.height; y += delta )
    {
        if( !use_ipp )
        {
            delta = MIN( size.height - y, max_dy );
            if( y + delta == size.height )
                stage = stage & CV_START ? CV_START + CV_END : CV_END;
            dx_filter.process( src, Dx, cvRect(0,y,-1,delta), cvPoint(0,0), stage );
            stripe_size.height = dy_filter.process( src, Dy, cvRect(0,y,-1,delta),
                                                    cvPoint(0,0), stage );
        }
        else
        {
            delta = icvIPPFilterNextStripe( src, tempsrc, y, el_size, el_anchor );
            stripe_size.height = delta;

            if( ipp_sobel_vert )
            {
                IPPI_CALL( ipp_sobel_vert( shifted_ptr, temp_step,
                                           Dx->data.ptr, d_step, stripe_size,
                                           aperture_size*10 + aperture_size ));
                IPPI_CALL( ipp_sobel_horiz( shifted_ptr, temp_step,
                                            Dy->data.ptr, d_step, stripe_size,
                                            aperture_size*10 + aperture_size ));
            }
            else /*if( ipp_scharr_vert )*/
            {
                IPPI_CALL( ipp_scharr_vert( shifted_ptr, temp_step,
                                            Dx->data.ptr, d_step, stripe_size ));
                IPPI_CALL( ipp_scharr_horiz( shifted_ptr, temp_step,
                                             Dy->data.ptr, d_step, stripe_size ));
            }
        }

        for( i = 0; i < stripe_size.height; i++ )
        {
            float* cov_data = (float*)(cov->data.ptr + i*cov->step);
            if( d_depth == CV_16S )
            {
                const short* dxdata = (const short*)(Dx->data.ptr + i*Dx->step);
                const short* dydata = (const short*)(Dy->data.ptr + i*Dy->step);

                for( j = 0; j < size.width; j++ )
                {
                    double dx = dxdata[j]*factorx;
                    double dy = dydata[j]*factory;

                    cov_data[j*3] = (float)(dx*dx);
                    cov_data[j*3+1] = (float)(dx*dy);
                    cov_data[j*3+2] = (float)(dy*dy);
                }
            }
            else
            {
                const float* dxdata = (const float*)(Dx->data.ptr + i*Dx->step);
                const float* dydata = (const float*)(Dy->data.ptr + i*Dy->step);

                for( j = 0; j < size.width; j++ )
                {
                    double dx = dxdata[j]*factorx;
                    double dy = dydata[j]*factory;

                    cov_data[j*3] = (float)(dx*dx);
                    cov_data[j*3+1] = (float)(dx*dy);
                    cov_data[j*3+2] = (float)(dy*dy);
                }
            }
        }

        if( y + stripe_size.height >= size.height )
            stage = stage & CV_START ? CV_START + CV_END : CV_END;

        stripe_size.height = blur_filter.process(cov,cov,
                             cvRect(0,0,-1,stripe_size.height),cvPoint(0,0),stage+CV_ISOLATED_ROI);

        if( op_type == ICV_MINEIGENVAL )
            icvCalcMinEigenVal( cov->data.fl, cov->step,
                                (float*)(eigenv->data.ptr + dst_y*eigenv->step), eigenv->step,
                                stripe_size, sqrt_buf );
        else if( op_type == ICV_HARRIS )
            icvCalcHarris( cov->data.fl, cov->step,
                           (float*)(eigenv->data.ptr + dst_y*eigenv->step), eigenv->step,
                           stripe_size, sqrt_buf, k );
        else if( op_type == ICV_EIGENVALSVECS )
            icvCalcEigenValsVecs( cov->data.fl, cov->step,
                                  (float*)(eigenv->data.ptr + dst_y*eigenv->step), eigenv->step,
                                  stripe_size, sqrt_buf );

        dst_y += stripe_size.height;
        stage = CV_MIDDLE;
    }

    __END__;

    cvReleaseMat( &Dx );
    cvReleaseMat( &Dy );
    cvReleaseMat( &cov );
    cvReleaseMat( &sqrt_buf );
    cvReleaseMat( &tempsrc );
}
예제 #2
0
파일: cvcorner.cpp 프로젝트: cybertk/opencv
CV_IMPL void
cvPreCornerDetect( const void* srcarr, void* dstarr, int aperture_size )
{
    CvSepFilter dx_filter, dy_filter, d2x_filter, d2y_filter, dxy_filter;
    CvMat *Dx = 0, *Dy = 0, *D2x = 0, *D2y = 0, *Dxy = 0;
    CvMat *tempsrc = 0;

    int buf_size = 1 << 12;

    CV_FUNCNAME( "cvPreCornerDetect" );

    __BEGIN__;

    int i, j, y, dst_y = 0, max_dy, delta = 0;
    int temp_step = 0, d_step;
    uchar* shifted_ptr = 0;
    int depth, d_depth;
    int stage = CV_START;
    CvSobelFixedIPPFunc ipp_sobel_vert = 0, ipp_sobel_horiz = 0,
                        ipp_sobel_vert_second = 0, ipp_sobel_horiz_second = 0,
                        ipp_sobel_cross = 0;
    CvSize el_size, size, stripe_size;
    int aligned_width;
    CvPoint el_anchor;
    double factor;
    CvMat stub, *src = (CvMat*)srcarr;
    CvMat dststub, *dst = (CvMat*)dstarr;
    bool use_ipp = false;

    CV_CALL( src = cvGetMat( srcarr, &stub ));
    CV_CALL( dst = cvGetMat( dst, &dststub ));

    if( CV_MAT_TYPE(src->type) != CV_8UC1 && CV_MAT_TYPE(src->type) != CV_32FC1 ||
            CV_MAT_TYPE(dst->type) != CV_32FC1 )
        CV_ERROR( CV_StsUnsupportedFormat, "Input must be 8uC1 or 32fC1, output must be 32fC1" );

    if( !CV_ARE_SIZES_EQ( src, dst ))
        CV_ERROR( CV_StsUnmatchedSizes, "" );

    if( aperture_size == CV_SCHARR )
        CV_ERROR( CV_StsOutOfRange, "CV_SCHARR is not supported by this function" );

    if( aperture_size < 3 || aperture_size > 7 || !(aperture_size & 1) )
        CV_ERROR( CV_StsOutOfRange,
                  "Derivative filter aperture size must be 3, 5 or 7" );

    depth = CV_MAT_DEPTH(src->type);
    d_depth = depth == CV_8U ? CV_16S : CV_32F;

    size = cvGetMatSize(src);
    aligned_width = cvAlign(size.width, 4);

    el_size = cvSize( aperture_size, aperture_size );
    el_anchor = cvPoint( aperture_size/2, aperture_size/2 );

    if( aperture_size <= 5 && icvFilterSobelVert_8u16s_C1R_p )
    {
        if( depth == CV_8U )
        {
            ipp_sobel_vert = icvFilterSobelVert_8u16s_C1R_p;
            ipp_sobel_horiz = icvFilterSobelHoriz_8u16s_C1R_p;
            ipp_sobel_vert_second = icvFilterSobelVertSecond_8u16s_C1R_p;
            ipp_sobel_horiz_second = icvFilterSobelHorizSecond_8u16s_C1R_p;
            ipp_sobel_cross = icvFilterSobelCross_8u16s_C1R_p;
        }
        else if( depth == CV_32F )
        {
            ipp_sobel_vert = icvFilterSobelVert_32f_C1R_p;
            ipp_sobel_horiz = icvFilterSobelHoriz_32f_C1R_p;
            ipp_sobel_vert_second = icvFilterSobelVertSecond_32f_C1R_p;
            ipp_sobel_horiz_second = icvFilterSobelHorizSecond_32f_C1R_p;
            ipp_sobel_cross = icvFilterSobelCross_32f_C1R_p;
        }
    }

    if( ipp_sobel_vert && ipp_sobel_horiz && ipp_sobel_vert_second &&
            ipp_sobel_horiz_second && ipp_sobel_cross )
    {
        CV_CALL( tempsrc = icvIPPFilterInit( src, buf_size, el_size ));
        shifted_ptr = tempsrc->data.ptr + el_anchor.y*tempsrc->step +
                      el_anchor.x*CV_ELEM_SIZE(depth);
        temp_step = tempsrc->step ? tempsrc->step : CV_STUB_STEP;
        max_dy = tempsrc->rows - aperture_size + 1;
        use_ipp = true;
    }
    else
    {
        ipp_sobel_vert = ipp_sobel_horiz = 0;
        ipp_sobel_vert_second = ipp_sobel_horiz_second = ipp_sobel_cross = 0;
        dx_filter.init_deriv( size.width, depth, d_depth, 1, 0, aperture_size );
        dy_filter.init_deriv( size.width, depth, d_depth, 0, 1, aperture_size );
        d2x_filter.init_deriv( size.width, depth, d_depth, 2, 0, aperture_size );
        d2y_filter.init_deriv( size.width, depth, d_depth, 0, 2, aperture_size );
        dxy_filter.init_deriv( size.width, depth, d_depth, 1, 1, aperture_size );
        max_dy = buf_size / src->cols;
        max_dy = MAX( max_dy, aperture_size );
    }

    CV_CALL( Dx = cvCreateMat( max_dy, aligned_width, d_depth ));
    CV_CALL( Dy = cvCreateMat( max_dy, aligned_width, d_depth ));
    CV_CALL( D2x = cvCreateMat( max_dy, aligned_width, d_depth ));
    CV_CALL( D2y = cvCreateMat( max_dy, aligned_width, d_depth ));
    CV_CALL( Dxy = cvCreateMat( max_dy, aligned_width, d_depth ));
    Dx->cols = Dy->cols = D2x->cols = D2y->cols = Dxy->cols = size.width;

    if( !use_ipp )
        max_dy -= aperture_size - 1;
    d_step = Dx->step ? Dx->step : CV_STUB_STEP;

    stripe_size = size;

    factor = 1 << (aperture_size - 1);
    if( depth == CV_8U )
        factor *= 255;
    factor = 1./(factor * factor * factor);

    aperture_size = aperture_size * 10 + aperture_size;

    for( y = 0; y < size.height; y += delta )
    {
        if( !use_ipp )
        {
            delta = MIN( size.height - y, max_dy );
            CvRect roi = cvRect(0,y,size.width,delta);
            CvPoint origin=cvPoint(0,0);

            if( y + delta == size.height )
                stage = stage & CV_START ? CV_START + CV_END : CV_END;

            dx_filter.process(src,Dx,roi,origin,stage);
            dy_filter.process(src,Dy,roi,origin,stage);
            d2x_filter.process(src,D2x,roi,origin,stage);
            d2y_filter.process(src,D2y,roi,origin,stage);
            stripe_size.height = dxy_filter.process(src,Dxy,roi,origin,stage);
        }
        else
        {
            delta = icvIPPFilterNextStripe( src, tempsrc, y, el_size, el_anchor );
            stripe_size.height = delta;

            IPPI_CALL( ipp_sobel_vert( shifted_ptr, temp_step,
                                       Dx->data.ptr, d_step, stripe_size, aperture_size ));
            IPPI_CALL( ipp_sobel_horiz( shifted_ptr, temp_step,
                                        Dy->data.ptr, d_step, stripe_size, aperture_size ));
            IPPI_CALL( ipp_sobel_vert_second( shifted_ptr, temp_step,
                                              D2x->data.ptr, d_step, stripe_size, aperture_size ));
            IPPI_CALL( ipp_sobel_horiz_second( shifted_ptr, temp_step,
                                               D2y->data.ptr, d_step, stripe_size, aperture_size ));
            IPPI_CALL( ipp_sobel_cross( shifted_ptr, temp_step,
                                        Dxy->data.ptr, d_step, stripe_size, aperture_size ));
        }

        for( i = 0; i < stripe_size.height; i++, dst_y++ )
        {
            float* dstdata = (float*)(dst->data.ptr + dst_y*dst->step);

            if( d_depth == CV_16S )
            {
                const short* dxdata = (const short*)(Dx->data.ptr + i*Dx->step);
                const short* dydata = (const short*)(Dy->data.ptr + i*Dy->step);
                const short* d2xdata = (const short*)(D2x->data.ptr + i*D2x->step);
                const short* d2ydata = (const short*)(D2y->data.ptr + i*D2y->step);
                const short* dxydata = (const short*)(Dxy->data.ptr + i*Dxy->step);

                for( j = 0; j < stripe_size.width; j++ )
                {
                    double dx = dxdata[j];
                    double dx2 = dx * dx;
                    double dy = dydata[j];
                    double dy2 = dy * dy;

                    dstdata[j] = (float)(factor*(dx2*d2ydata[j] + dy2*d2xdata[j] - 2*dx*dy*dxydata[j]));
                }
            }
            else
            {
                const float* dxdata = (const float*)(Dx->data.ptr + i*Dx->step);
                const float* dydata = (const float*)(Dy->data.ptr + i*Dy->step);
                const float* d2xdata = (const float*)(D2x->data.ptr + i*D2x->step);
                const float* d2ydata = (const float*)(D2y->data.ptr + i*D2y->step);
                const float* dxydata = (const float*)(Dxy->data.ptr + i*Dxy->step);

                for( j = 0; j < stripe_size.width; j++ )
                {
                    double dx = dxdata[j];
                    double dy = dydata[j];
                    dstdata[j] = (float)(factor*(dx*dx*d2ydata[j] + dy*dy*d2xdata[j] - 2*dx*dy*dxydata[j]));
                }
            }
        }

        stage = CV_MIDDLE;
    }

    __END__;

    cvReleaseMat( &Dx );
    cvReleaseMat( &Dy );
    cvReleaseMat( &D2x );
    cvReleaseMat( &D2y );
    cvReleaseMat( &Dxy );
    cvReleaseMat( &tempsrc );
}
예제 #3
0
CV_IMPL void
cvFilter2D( const CvArr* _src, CvArr* _dst, const CvMat* _kernel, CvPoint anchor )
{
    // below that approximate size OpenCV is faster
    const int ipp_lower_limit = 20;
    
    static CvFuncTable filter_tab;
    static int inittab = 0;
    CvFilterState *state = 0;
    float* kernel_data = 0;
    int local_alloc = 1;
    CvMat* temp = 0;

    CV_FUNCNAME( "cvFilter2D" );

    __BEGIN__;

    CvFilterFunc func = 0;
    int coi1 = 0, coi2 = 0;
    CvMat srcstub, *src = (CvMat*)_src;
    CvMat dststub, *dst = (CvMat*)_dst;
    CvSize size;
    int type, depth;
    int src_step, dst_step;
    CvMat kernel_hdr;
    const CvMat* kernel = _kernel;

    if( !inittab )
    {
        icvInitFilterTab( &filter_tab );
        inittab = 1;
    }

    CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
    CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));

    if( coi1 != 0 || coi2 != 0 )
        CV_ERROR( CV_BadCOI, "" );

    type = CV_MAT_TYPE( src->type );

    if( !CV_ARE_SIZES_EQ( src, dst ))
        CV_ERROR( CV_StsUnmatchedSizes, "" );

    if( !CV_ARE_TYPES_EQ( src, dst ))
        CV_ERROR( CV_StsUnmatchedFormats, "" );

    if( !CV_IS_MAT(kernel) ||
        (CV_MAT_TYPE(kernel->type) != CV_32F &&
        CV_MAT_TYPE(kernel->type) != CV_64F ))
        CV_ERROR( CV_StsBadArg, "kernel must be single-channel floating-point matrix" );

    if( anchor.x == -1 && anchor.y == -1 )
        anchor = cvPoint(kernel->cols/2,kernel->rows/2);

    if( (unsigned)anchor.x >= (unsigned)kernel->cols ||
        (unsigned)anchor.y >= (unsigned)kernel->rows )
        CV_ERROR( CV_StsOutOfRange, "anchor point is out of kernel" );

    if( CV_MAT_TYPE(kernel->type) != CV_32FC1 || !CV_IS_MAT_CONT(kernel->type) || icvFilter_8u_C1R_p )
    {
        int sz = kernel->rows*kernel->cols*sizeof(kernel_data[0]);
        if( sz < CV_MAX_LOCAL_SIZE )
            kernel_data = (float*)cvStackAlloc( sz );
        else
        {
            CV_CALL( kernel_data = (float*)cvAlloc( sz ));
            local_alloc = 0;
        }
        kernel_hdr = cvMat( kernel->rows, kernel->cols, CV_32F, kernel_data );
        if( CV_MAT_TYPE(kernel->type) == CV_32FC1 )
            cvCopy( kernel, &kernel_hdr );
        else
            cvConvertScale( kernel, &kernel_hdr, 1, 0 );
        kernel = &kernel_hdr;
    }

    size = cvGetMatSize( src );
    depth = CV_MAT_DEPTH(type);
    src_step = src->step;
    dst_step = dst->step ? dst->step : CV_STUB_STEP;

    if( icvFilter_8u_C1R_p && (src->rows >= ipp_lower_limit || src->cols >= ipp_lower_limit) )
    {
        CvFilterIPPFunc ipp_func = 
                type == CV_8UC1 ? (CvFilterIPPFunc)icvFilter_8u_C1R_p :
                type == CV_8UC3 ? (CvFilterIPPFunc)icvFilter_8u_C3R_p :
                type == CV_8UC4 ? (CvFilterIPPFunc)icvFilter_8u_C4R_p :
                type == CV_16SC1 ? (CvFilterIPPFunc)icvFilter_16s_C1R_p :
                type == CV_16SC3 ? (CvFilterIPPFunc)icvFilter_16s_C3R_p :
                type == CV_16SC4 ? (CvFilterIPPFunc)icvFilter_16s_C4R_p :
                type == CV_32FC1 ? (CvFilterIPPFunc)icvFilter_32f_C1R_p :
                type == CV_32FC3 ? (CvFilterIPPFunc)icvFilter_32f_C3R_p :
                type == CV_32FC4 ? (CvFilterIPPFunc)icvFilter_32f_C4R_p : 0;
        
        if( ipp_func )
        {
            CvSize el_size = { kernel->cols, kernel->rows };
            CvPoint el_anchor = { el_size.width - anchor.x - 1, el_size.height - anchor.y - 1 };
            int stripe_size = 1 << 16; // the optimal value may depend on CPU cache,
                                       // overhead of current IPP code etc.
            const uchar* shifted_ptr;
            int i, j, y, dy = 0;
            int temp_step;

            // mirror the kernel around the center
            for( i = 0; i < (el_size.height+1)/2; i++ )
            {
                float* top_row = kernel->data.fl + el_size.width*i;
                float* bottom_row = kernel->data.fl + el_size.width*(el_size.height - i - 1);

                for( j = 0; j < (el_size.width+1)/2; j++ )
                {
                    float a = top_row[j], b = top_row[el_size.width - j - 1];
                    float c = bottom_row[j], d = bottom_row[el_size.width - j - 1];
                    top_row[j] = d;
                    top_row[el_size.width - j - 1] = c;
                    bottom_row[j] = b;
                    bottom_row[el_size.width - j - 1] = a;
                }
            }

            CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
            
            shifted_ptr = temp->data.ptr +
                anchor.y*temp->step + anchor.x*CV_ELEM_SIZE(type);
            temp_step = temp->step ? temp->step : CV_STUB_STEP;

            for( y = 0; y < src->rows; y += dy )
            {
                dy = icvIPPFilterNextStripe( src, temp, y, el_size, anchor );
                IPPI_CALL( ipp_func( shifted_ptr, temp_step,
                    dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
                    kernel->data.fl, el_size, el_anchor ));
            }
            EXIT;
        }
    }

    CV_CALL( state = icvFilterInitAlloc( src->cols, cv32f, CV_MAT_CN(type),
                                   cvSize(kernel->cols, kernel->rows), anchor,
                                   kernel->data.ptr, ICV_GENERIC_KERNEL ));

    if( CV_MAT_CN(type) == 2 )
        CV_ERROR( CV_BadNumChannels, "Unsupported number of channels" );

    func = (CvFilterFunc)(filter_tab.fn_2d[depth]);

    if( !func )
        CV_ERROR( CV_StsUnsupportedFormat, "" );

    if( size.height == 1 )
        src_step = dst_step = CV_STUB_STEP;

    IPPI_CALL( func( src->data.ptr, src_step, dst->data.ptr,
                     dst_step, &size, state, 0 ));

    __END__;

    cvReleaseMat( &temp );
    icvFilterFree( &state );
    if( !local_alloc )
        cvFree( (void**)&kernel_data );
}