int CvMorphology::fill_cyclic_buffer( const uchar* src, int src_step, int y0, int y1, int y2 ) { int i, y = y0, bsz1 = border_tab_sz1, bsz = border_tab_sz; int pix_size = CV_ELEM_SIZE(src_type); int width_n = (prev_x_range.end_index - prev_x_range.start_index)*pix_size; if( CV_MAT_DEPTH(src_type) != CV_32F ) return CvBaseImageFilter::fill_cyclic_buffer( src, src_step, y0, y1, y2 ); // fill the cyclic buffer for( ; buf_count < buf_max_count && y < y2; buf_count++, y++, src += src_step ) { uchar* trow = is_separable ? buf_end : buf_tail; for( i = 0; i < width_n; i += sizeof(int) ) { int t = *(int*)(src + i); *(int*)(trow + i + bsz1) = CV_TOGGLE_FLT(t); } if( border_mode != IPL_BORDER_CONSTANT ) { for( i = 0; i < bsz1; i++ ) { int j = border_tab[i]; trow[i] = trow[j]; } for( ; i < bsz; i++ ) { int j = border_tab[i]; trow[i + width_n] = trow[j]; } } else { const uchar *bt = (uchar*)border_tab; for( i = 0; i < bsz1; i++ ) trow[i] = bt[i]; for( ; i < bsz; i++ ) trow[i + width_n] = bt[i]; } if( is_separable ) x_func( trow, buf_tail, this ); buf_tail += buf_step; if( buf_tail >= buf_end ) buf_tail = buf_start; } return y - y0; }
int icvIPPSepFilter( const CvMat* src, CvMat* dst, const CvMat* kernelX, const CvMat* kernelY, CvPoint anchor ) { int result = 0; CvMat* top_bottom = 0; CvMat* vout_hin = 0; CvMat* dst_buf = 0; CV_FUNCNAME( "icvIPPSepFilter" ); __BEGIN__; CvSize ksize; CvPoint el_anchor; CvSize size; int type, depth, pix_size; int i, x, y, dy = 0, prev_dy = 0, max_dy; CvMat vout; CvCopyNonConstBorderFunc copy_border_func; CvIPPSepFilterFunc x_func = 0, y_func = 0; int src_step, top_bottom_step; float *kx, *ky; int align, stripe_size; if( !icvFilterRow_8u_C1R_p ) EXIT; if( !CV_ARE_TYPES_EQ( src, dst ) || !CV_ARE_SIZES_EQ( src, dst ) || !CV_IS_MAT_CONT(kernelX->type & kernelY->type) || CV_MAT_TYPE(kernelX->type) != CV_32FC1 || CV_MAT_TYPE(kernelY->type) != CV_32FC1 || kernelX->cols != 1 && kernelX->rows != 1 || kernelY->cols != 1 && kernelY->rows != 1 || (unsigned)anchor.x >= (unsigned)(kernelX->cols + kernelX->rows - 1) || (unsigned)anchor.y >= (unsigned)(kernelY->cols + kernelY->rows - 1) ) CV_ERROR( CV_StsError, "Internal Error: incorrect parameters" ); ksize.width = kernelX->cols + kernelX->rows - 1; ksize.height = kernelY->cols + kernelY->rows - 1; /*if( ksize.width <= 5 && ksize.height <= 5 ) { float* ker = (float*)cvStackAlloc( ksize.width*ksize.height*sizeof(ker[0])); CvMat kernel = cvMat( ksize.height, ksize.width, CV_32F, ker ); for( y = 0, i = 0; y < ksize.height; y++ ) for( x = 0; x < ksize.width; x++, i++ ) ker[i] = kernelY->data.fl[y]*kernelX->data.fl[x]; CV_CALL( cvFilter2D( src, dst, &kernel, anchor )); EXIT; }*/ type = CV_MAT_TYPE(src->type); depth = CV_MAT_DEPTH(type); pix_size = CV_ELEM_SIZE(type); if( type == CV_8UC1 ) x_func = icvFilterRow_8u_C1R_p, y_func = icvFilterColumn_8u_C1R_p; else if( type == CV_8UC3 ) x_func = icvFilterRow_8u_C3R_p, y_func = icvFilterColumn_8u_C3R_p; else if( type == CV_8UC4 ) x_func = icvFilterRow_8u_C4R_p, y_func = icvFilterColumn_8u_C4R_p; else if( type == CV_16SC1 ) x_func = icvFilterRow_16s_C1R_p, y_func = icvFilterColumn_16s_C1R_p; else if( type == CV_16SC3 ) x_func = icvFilterRow_16s_C3R_p, y_func = icvFilterColumn_16s_C3R_p; else if( type == CV_16SC4 ) x_func = icvFilterRow_16s_C4R_p, y_func = icvFilterColumn_16s_C4R_p; else if( type == CV_32FC1 ) x_func = icvFilterRow_32f_C1R_p, y_func = icvFilterColumn_32f_C1R_p; else if( type == CV_32FC3 ) x_func = icvFilterRow_32f_C3R_p, y_func = icvFilterColumn_32f_C3R_p; else if( type == CV_32FC4 ) x_func = icvFilterRow_32f_C4R_p, y_func = icvFilterColumn_32f_C4R_p; else EXIT; size = cvGetMatSize(src); stripe_size = src->data.ptr == dst->data.ptr ? 1 << 15 : 1 << 16; max_dy = MAX( ksize.height - 1, stripe_size/(size.width + ksize.width - 1)); max_dy = MIN( max_dy, size.height + ksize.height - 1 ); align = 8/CV_ELEM_SIZE(depth); CV_CALL( top_bottom = cvCreateMat( ksize.height*2, cvAlign(size.width,align), type )); CV_CALL( vout_hin = cvCreateMat( max_dy + ksize.height, cvAlign(size.width + ksize.width - 1, align), type )); if( src->data.ptr == dst->data.ptr && size.height ) CV_CALL( dst_buf = cvCreateMat( max_dy + ksize.height, cvAlign(size.width, align), type )); kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) ); ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) ); // mirror the kernels for( i = 0; i < ksize.width; i++ ) kx[i] = kernelX->data.fl[ksize.width - i - 1]; for( i = 0; i < ksize.height; i++ ) ky[i] = kernelY->data.fl[ksize.height - i - 1]; el_anchor = cvPoint( ksize.width - anchor.x - 1, ksize.height - anchor.y - 1 ); cvGetCols( vout_hin, &vout, anchor.x, anchor.x + size.width ); copy_border_func = icvGetCopyNonConstBorderFunc( pix_size, IPL_BORDER_REPLICATE ); src_step = src->step ? src->step : CV_STUB_STEP; top_bottom_step = top_bottom->step ? top_bottom->step : CV_STUB_STEP; vout.step = vout.step ? vout.step : CV_STUB_STEP; for( y = 0; y < size.height; y += dy ) { const CvMat *vin = src, *hout = dst; int src_y = y, dst_y = y; dy = MIN( max_dy, size.height - (ksize.height - anchor.y - 1) - y ); if( y < anchor.y || dy < anchor.y ) { int ay = anchor.y; CvSize src_stripe_size = size; if( y < anchor.y ) { src_y = 0; dy = MIN( anchor.y, size.height ); src_stripe_size.height = MIN( dy + ksize.height - anchor.y - 1, size.height ); } else { src_y = MAX( y - anchor.y, 0 ); dy = size.height - y; src_stripe_size.height = MIN( dy + anchor.y, size.height ); ay = MAX( anchor.y - y, 0 ); } copy_border_func( src->data.ptr + src_y*src_step, src_step, src_stripe_size, top_bottom->data.ptr, top_bottom_step, cvSize(size.width, dy + ksize.height - 1), ay, 0 ); vin = top_bottom; src_y = anchor.y; } // do vertical convolution IPPI_CALL( y_func( vin->data.ptr + src_y*vin->step, vin->step ? vin->step : CV_STUB_STEP, vout.data.ptr, vout.step, cvSize(size.width, dy), ky, ksize.height, el_anchor.y )); // now it's time to copy the previously processed stripe to the input/output image if( src->data.ptr == dst->data.ptr ) { for( i = 0; i < prev_dy; i++ ) memcpy( dst->data.ptr + (y - prev_dy + i)*dst->step, dst_buf->data.ptr + i*dst_buf->step, size.width*pix_size ); if( y + dy < size.height ) { hout = dst_buf; dst_y = 0; } } // create a border for every line by replicating the left-most/right-most elements for( i = 0; i < dy; i++ ) { uchar* ptr = vout.data.ptr + i*vout.step; for( x = -1; x >= -anchor.x*pix_size; x-- ) ptr[x] = ptr[x + pix_size]; for( x = size.width*pix_size; x < (size.width+ksize.width-anchor.x-1)*pix_size; x++ ) ptr[x] = ptr[x - pix_size]; } // do horizontal convolution IPPI_CALL( x_func( vout.data.ptr, vout.step, hout->data.ptr + dst_y*hout->step, hout->step ? hout->step : CV_STUB_STEP, cvSize(size.width, dy), kx, ksize.width, el_anchor.x )); prev_dy = dy; } result = 1; __END__; cvReleaseMat( &vout_hin ); cvReleaseMat( &dst_buf ); cvReleaseMat( &top_bottom ); return result; }