示例#1
0
// default <malloc>
static void*
icvDefaultAlloc( size_t size, void* )
{
    char *ptr, *ptr0 = (char*)malloc(
        (size_t)(size + CV_MALLOC_ALIGN*((size >= 4096) + 1) + sizeof(char*)));

    if( !ptr0 )
        return 0;

    // align the pointer
    ptr = (char*)cvAlignPtr(ptr0 + sizeof(char*) + 1, CV_MALLOC_ALIGN);
    *(char**)(ptr - sizeof(char*)) = ptr0;

    return ptr;
}
示例#2
0
// default <malloc>
void*
cvAlloc( size_t size)
{
    char *ptr, *ptr0;
	CV_FUNCNAME( "cvAlloc" );
	__BEGIN__;
    
    ptr0 = (char*)malloc(
        (size_t)(size + CV_MALLOC_ALIGN*((size >= 4096) + 1) + sizeof(char*)));

    if( !ptr0 )
        return 0;

    // align the pointer
    ptr = (char*)cvAlignPtr(ptr0 + sizeof(char*) + 1, CV_MALLOC_ALIGN);
    *(char**)(ptr - sizeof(char*)) = ptr0;

	__END__;
    return ptr;
}
示例#3
0
/*F///////////////////////////////////////////////////////////////////////////////////////
//    Name: icvCalcPGH
//    Purpose:
//      Calculates PGH(pairwise geometric histogram) for contour given.
//    Context:
//    Parameters:
//      contour  - pointer to input contour object.
//      pgh      - output histogram
//      ang_dim  - number of angle bins (vertical size of histogram)
//      dist_dim - number of distance bins (horizontal size of histogram)
//    Returns:
//      CV_OK or error code
//    Notes:
//F*/
static CvStatus
icvCalcPGH( const CvSeq * contour, float *pgh, int angle_dim, int dist_dim )
{
    char local_buffer[(1 << 14) + 32];
    float *local_buffer_ptr = (float *)cvAlignPtr(local_buffer,32);
    float *buffer = local_buffer_ptr;
    double angle_scale = (angle_dim - 0.51) / icv_acos_table[0];
    double dist_scale = DBL_EPSILON;
    int buffer_size;
    int i, count, pass;
    int *pghi = (int *) pgh;
    int hist_size = angle_dim * dist_dim;
    CvSeqReader reader1, reader2;       /* external and internal readers */

    if( !contour || !pgh )
        return CV_NULLPTR_ERR;

    if( angle_dim <= 0 || angle_dim > 180 || dist_dim <= 0 )
        return CV_BADRANGE_ERR;

    if( !CV_IS_SEQ_POINT_SET( contour ))
        return CV_BADFLAG_ERR;

    memset( pgh, 0, hist_size * sizeof( pgh[0] ));

    count = contour->total;

    /* allocate buffer for distances */
    buffer_size = count * sizeof( float );

    if( buffer_size > (int)sizeof(local_buffer) - 32 )
    {
        buffer = (float *) cvAlloc( buffer_size );
        if( !buffer )
            return CV_OUTOFMEM_ERR;
    }

    cvStartReadSeq( contour, &reader1, 0 );
    cvStartReadSeq( contour, &reader2, 0 );

    /* calc & store squared edge lengths, calculate maximal distance between edges */
    for( i = 0; i < count; i++ )
    {
        CvPoint pt1, pt2;
        double dx, dy;

        CV_READ_EDGE( pt1, pt2, reader1 );

        dx = pt2.x - pt1.x;
        dy = pt2.y - pt1.y;
        buffer[i] = (float)(1./sqrt(dx * dx + dy * dy));
    }

    /* 
       do 2 passes. 
       First calculates maximal distance.
       Second calculates histogram itself.
     */
    for( pass = 1; pass <= 2; pass++ )
    {
        double dist_coeff = 0, angle_coeff = 0;

        /* run external loop */
        for( i = 0; i < count; i++ )
        {
            CvPoint pt1, pt2;
            int dx, dy;
            int dist = 0;

            CV_READ_EDGE( pt1, pt2, reader1 );

            dx = pt2.x - pt1.x;
            dy = pt2.y - pt1.y;

            if( (dx | dy) != 0 )
            {
                int j;

                if( pass == 2 )
                {
                    dist_coeff = buffer[i] * dist_scale;
                    angle_coeff = buffer[i] * (_CV_ACOS_TABLE_SIZE / 2);
                }

                /* run internal loop (for current edge) */
                for( j = 0; j < count; j++ )
                {
                    CvPoint pt3, pt4;

                    CV_READ_EDGE( pt3, pt4, reader2 );

                    if( i != j )        /* process edge pair */
                    {
                        int d1 = (pt3.y - pt1.y) * dx - (pt3.x - pt1.x) * dy;
                        int d2 = (pt4.y - pt1.y) * dx - (pt2.x - pt1.x) * dy;
                        int cross_flag;
                        int *hist_row = 0;

                        if( pass == 2 )
                        {
                            int dp = (pt4.x - pt3.x) * dx + (pt4.y - pt3.y) * dy;

                            dp = cvRound( dp * angle_coeff * buffer[j] ) +
                                (_CV_ACOS_TABLE_SIZE / 2);
                            dp = MAX( dp, 0 );
                            dp = MIN( dp, _CV_ACOS_TABLE_SIZE - 1 );
                            hist_row = pghi + dist_dim *
                                cvRound( icv_acos_table[dp] * angle_scale );

                            d1 = cvRound( d1 * dist_coeff );
                            d2 = cvRound( d2 * dist_coeff );
                        }

                        cross_flag = (d1 ^ d2) < 0;

                        d1 = CV_IABS( d1 );
                        d2 = CV_IABS( d2 );

                        if( pass == 2 )
                        {
                            if( d1 >= dist_dim )
                                d1 = dist_dim - 1;
                            if( d2 >= dist_dim )
                                d2 = dist_dim - 1;

                            if( !cross_flag )
                            {
                                if( d1 > d2 )   /* make d1 <= d2 */
                                {
                                    d1 ^= d2;
                                    d2 ^= d1;
                                    d1 ^= d2;
                                }

                                for( ; d1 <= d2; d1++ )
                                    hist_row[d1]++;
                            }
                            else
                            {
                                for( ; d1 >= 0; d1-- )
                                    hist_row[d1]++;
                                for( ; d2 >= 0; d2-- )
                                    hist_row[d2]++;
                            }
                        }
                        else    /* 1st pass */
                        {
                            d1 = CV_IMAX( d1, d2 );
                            dist = CV_IMAX( dist, d1 );
                        }
                    }           /* end of processing of edge pair */

                }               /* end of internal loop */

                if( pass == 1 )
                {
                    double scale = dist * buffer[i];

                    dist_scale = MAX( dist_scale, scale );
                }
            }
        }                       /* end of external loop */

        if( pass == 1 )
        {
            dist_scale = (dist_dim - 0.51) / dist_scale;
        }

    }                           /* end of pass on loops */


    /* convert hist to floats */
    for( i = 0; i < hist_size; i++ )
    {
        ((float *) pghi)[i] = (float) pghi[i];
    }

    if( buffer != local_buffer_ptr )
        cvFree( &buffer );

    return CV_OK;
}
示例#4
0
/* Calculates bounding rectagnle of a point set or retrieves already calculated */
CV_IMPL  CvRect
cvBoundingRect( CvArr* array, int update )
{
    CvSeqReader reader;
    CvRect  rect = { 0, 0, 0, 0 };
    CvContour contour_header;
    CvSeq* ptseq = 0;
    CvSeqBlock block;

    CV_FUNCNAME( "cvBoundingRect" );

    __BEGIN__;

    CvMat stub, *mat = 0;
    int  xmin = 0, ymin = 0, xmax = -1, ymax = -1, i, j, k;
    int calculate = update;

    if( CV_IS_SEQ( array ))
    {
        ptseq = (CvSeq*)array;
        if( !CV_IS_SEQ_POINT_SET( ptseq ))
            CV_ERROR( CV_StsBadArg, "Unsupported sequence type" );

        if( ptseq->header_size < (int)sizeof(CvContour))
        {
            /*if( update == 1 )
                CV_ERROR( CV_StsBadArg, "The header is too small to fit the rectangle, "
                                        "so it could not be updated" );*/
            update = 0;
            calculate = 1;
        }
    }
    else
    {
        CV_CALL( mat = cvGetMat( array, &stub ));
        if( CV_MAT_TYPE(mat->type) == CV_32SC2 ||
            CV_MAT_TYPE(mat->type) == CV_32FC2 )
        {
            CV_CALL( ptseq = cvPointSeqFromMat(
                CV_SEQ_KIND_GENERIC, mat, &contour_header, &block ));
            mat = 0;
        }
        else if( CV_MAT_TYPE(mat->type) != CV_8UC1 &&
                CV_MAT_TYPE(mat->type) != CV_8SC1 )
            CV_ERROR( CV_StsUnsupportedFormat,
                "The image/matrix format is not supported by the function" );
        update = 0;
        calculate = 1;
    }

    if( !calculate )
    {
        rect = ((CvContour*)ptseq)->rect;
        EXIT;
    }

    if( mat )
    {
        CvSize size = cvGetMatSize(mat);
        xmin = size.width;
        ymin = -1;

        for( i = 0; i < size.height; i++ )
        {
            uchar* _ptr = mat->data.ptr + i*mat->step;
            uchar* ptr = (uchar*)cvAlignPtr(_ptr, 4);
            int have_nz = 0, k_min, offset = (int)(ptr - _ptr);
            j = 0;
            offset = MIN(offset, size.width);
            for( ; j < offset; j++ )
                if( _ptr[j] )
                {
                    have_nz = 1;
                    break;
                }
            if( j < offset )
            {
                if( j < xmin )
                    xmin = j;
                if( j > xmax )
                    xmax = j;
            }
            if( offset < size.width )
            {
                xmin -= offset;
                xmax -= offset;
                size.width -= offset;
                j = 0;
                for( ; j <= xmin - 4; j += 4 )
                    if( *((int*)(ptr+j)) )
                        break;
                for( ; j < xmin; j++ )
                    if( ptr[j] )
                    {
                        xmin = j;
                        if( j > xmax )
                            xmax = j;
                        have_nz = 1;
                        break;
                    }
                k_min = MAX(j-1, xmax);
                k = size.width - 1;
                for( ; k > k_min && (k&3) != 3; k-- )
                    if( ptr[k] )
                        break;
                if( k > k_min && (k&3) == 3 )
                {
                    for( ; k > k_min+3; k -= 4 )
                        if( *((int*)(ptr+k-3)) )
                            break;
                }
                for( ; k > k_min; k-- )
                    if( ptr[k] )
                    {
                        xmax = k;
                        have_nz = 1;
                        break;
                    }
                if( !have_nz )
                {
                    j &= ~3;
                    for( ; j <= k - 3; j += 4 )
                        if( *((int*)(ptr+j)) )
                            break;
                    for( ; j <= k; j++ )
                        if( ptr[j] )
                        {
                            have_nz = 1;
                            break;
                        }
                }
                xmin += offset;
                xmax += offset;
                size.width += offset;
            }
            if( have_nz )
            {
                if( ymin < 0 )
                    ymin = i;
                ymax = i;
            }
        }

        if( xmin >= size.width )
            xmin = ymin = 0;
    }
    else if( ptseq->total )
    {   
        int  is_float = CV_SEQ_ELTYPE(ptseq) == CV_32FC2;
        cvStartReadSeq( ptseq, &reader, 0 );

        if( !is_float )
        {
            CvPoint pt;
            /* init values */
            CV_READ_SEQ_ELEM( pt, reader );
            xmin = xmax = pt.x;
            ymin = ymax = pt.y;

            for( i = 1; i < ptseq->total; i++ )
            {            
                CV_READ_SEQ_ELEM( pt, reader );
        
                if( xmin > pt.x )
                    xmin = pt.x;
        
                if( xmax < pt.x )
                    xmax = pt.x;

                if( ymin > pt.y )
                    ymin = pt.y;

                if( ymax < pt.y )
                    ymax = pt.y;
            }
        }
        else
        {
            CvPoint pt;
            Cv32suf v;
            /* init values */
            CV_READ_SEQ_ELEM( pt, reader );
            xmin = xmax = CV_TOGGLE_FLT(pt.x);
            ymin = ymax = CV_TOGGLE_FLT(pt.y);

            for( i = 1; i < ptseq->total; i++ )
            {            
                CV_READ_SEQ_ELEM( pt, reader );
                pt.x = CV_TOGGLE_FLT(pt.x);
                pt.y = CV_TOGGLE_FLT(pt.y);
        
                if( xmin > pt.x )
                    xmin = pt.x;
        
                if( xmax < pt.x )
                    xmax = pt.x;

                if( ymin > pt.y )
                    ymin = pt.y;

                if( ymax < pt.y )
                    ymax = pt.y;
            }

            v.i = CV_TOGGLE_FLT(xmin); xmin = cvFloor(v.f);
            v.i = CV_TOGGLE_FLT(ymin); ymin = cvFloor(v.f);
            /* because right and bottom sides of
               the bounding rectangle are not inclusive
               (note +1 in width and height calculation below),
               cvFloor is used here instead of cvCeil */
            v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
            v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
        }
    }

    rect.x = xmin;
    rect.y = ymin;
    rect.width = xmax - xmin + 1;
    rect.height = ymax - ymin + 1;

    if( update )
        ((CvContour*)ptseq)->rect = rect;

    __END__;

    return rect;
}
示例#5
0
/* Calculates bounding rectagnle of a point set or retrieves already calculated */
CV_IMPL  CvRect
cvBoundingRect( CvArr* array, int update )
{
    CvSeqReader reader;
    CvRect  rect = { 0, 0, 0, 0 };
    CvContour contour_header;
    CvSeq* ptseq = 0;
    CvSeqBlock block;

    CvMat stub, *mat = 0;
    int  xmin = 0, ymin = 0, xmax = -1, ymax = -1, i, j, k;
    int calculate = update;

    if( CV_IS_SEQ( array ))
    {
        ptseq = (CvSeq*)array;
        if( !CV_IS_SEQ_POINT_SET( ptseq ))
            CV_Error( CV_StsBadArg, "Unsupported sequence type" );

        if( ptseq->header_size < (int)sizeof(CvContour))
        {
            update = 0;
            calculate = 1;
        }
    }
    else
    {
        mat = cvGetMat( array, &stub );
        if( CV_MAT_TYPE(mat->type) == CV_32SC2 ||
            CV_MAT_TYPE(mat->type) == CV_32FC2 )
        {
            ptseq = cvPointSeqFromMat(CV_SEQ_KIND_GENERIC, mat, &contour_header, &block);
            mat = 0;
        }
        else if( CV_MAT_TYPE(mat->type) != CV_8UC1 &&
                CV_MAT_TYPE(mat->type) != CV_8SC1 )
            CV_Error( CV_StsUnsupportedFormat,
                "The image/matrix format is not supported by the function" );
        update = 0;
        calculate = 1;
    }

    if( !calculate )
        return ((CvContour*)ptseq)->rect;

    if( mat )
    {
        CvSize size = cvGetMatSize(mat);
        xmin = size.width;
        ymin = -1;

        for( i = 0; i < size.height; i++ )
        {
            uchar* _ptr = mat->data.ptr + i*mat->step;
            uchar* ptr = (uchar*)cvAlignPtr(_ptr, 4);
            int have_nz = 0, k_min, offset = (int)(ptr - _ptr);
            j = 0;
            offset = MIN(offset, size.width);
            for( ; j < offset; j++ )
                if( _ptr[j] )
                {
                    have_nz = 1;
                    break;
                }
            if( j < offset )
            {
                if( j < xmin )
                    xmin = j;
                if( j > xmax )
                    xmax = j;
            }
            if( offset < size.width )
            {
                xmin -= offset;
                xmax -= offset;
                size.width -= offset;
                j = 0;
                for( ; j <= xmin - 4; j += 4 )
                    if( *((int*)(ptr+j)) )
                        break;
                for( ; j < xmin; j++ )
                    if( ptr[j] )
                    {
                        xmin = j;
                        if( j > xmax )
                            xmax = j;
                        have_nz = 1;
                        break;
                    }
                k_min = MAX(j-1, xmax);
                k = size.width - 1;
                for( ; k > k_min && (k&3) != 3; k-- )
                    if( ptr[k] )
                        break;
                if( k > k_min && (k&3) == 3 )
                {
                    for( ; k > k_min+3; k -= 4 )
                        if( *((int*)(ptr+k-3)) )
                            break;
                }
                for( ; k > k_min; k-- )
                    if( ptr[k] )
                    {
                        xmax = k;
                        have_nz = 1;
                        break;
                    }
                if( !have_nz )
                {
                    j &= ~3;
                    for( ; j <= k - 3; j += 4 )
                        if( *((int*)(ptr+j)) )
                            break;
                    for( ; j <= k; j++ )
                        if( ptr[j] )
                        {
                            have_nz = 1;
                            break;
                        }
                }
                xmin += offset;
                xmax += offset;
                size.width += offset;
            }
            if( have_nz )
            {
                if( ymin < 0 )
                    ymin = i;
                ymax = i;
            }
        }

        if( xmin >= size.width )
            xmin = ymin = 0;
    }
    else if( ptseq->total )
    {
        int  is_float = CV_SEQ_ELTYPE(ptseq) == CV_32FC2;
        cvStartReadSeq( ptseq, &reader, 0 );
        CvPoint pt;
        CV_READ_SEQ_ELEM( pt, reader );
    #if CV_SSE4_2
        if(cv::checkHardwareSupport(CV_CPU_SSE4_2))
        {
            if( !is_float )
            {
                __m128i minval, maxval;
                minval = maxval = _mm_loadl_epi64((const __m128i*)(&pt)); //min[0]=pt.x, min[1]=pt.y

                for( i = 1; i < ptseq->total; i++)
                {
                    __m128i ptXY = _mm_loadl_epi64((const __m128i*)(reader.ptr));
                    CV_NEXT_SEQ_ELEM(sizeof(pt), reader);
                    minval = _mm_min_epi32(ptXY, minval);
                    maxval = _mm_max_epi32(ptXY, maxval);
                }
                xmin = _mm_cvtsi128_si32(minval);
                ymin = _mm_cvtsi128_si32(_mm_srli_si128(minval, 4));
                xmax = _mm_cvtsi128_si32(maxval);
                ymax = _mm_cvtsi128_si32(_mm_srli_si128(maxval, 4));
            }
            else
            {
                __m128 minvalf, maxvalf, z = _mm_setzero_ps(), ptXY = _mm_setzero_ps();
                minvalf = maxvalf = _mm_loadl_pi(z, (const __m64*)(&pt));

                for( i = 1; i < ptseq->total; i++ )
                {
                    ptXY = _mm_loadl_pi(ptXY, (const __m64*)reader.ptr);
                    CV_NEXT_SEQ_ELEM(sizeof(pt), reader);

                    minvalf = _mm_min_ps(minvalf, ptXY);
                    maxvalf = _mm_max_ps(maxvalf, ptXY);
                }

                float xyminf[2], xymaxf[2];
                _mm_storel_pi((__m64*)xyminf, minvalf);
                _mm_storel_pi((__m64*)xymaxf, maxvalf);
                xmin = cvFloor(xyminf[0]);
                ymin = cvFloor(xyminf[1]);
                xmax = cvFloor(xymaxf[0]);
                ymax = cvFloor(xymaxf[1]);
            }
        }
        else
    #endif
        {
            if( !is_float )
            {
                xmin = xmax = pt.x;
                ymin = ymax = pt.y;

                for( i = 1; i < ptseq->total; i++ )
                {
                    CV_READ_SEQ_ELEM( pt, reader );

                    if( xmin > pt.x )
                        xmin = pt.x;

                    if( xmax < pt.x )
                        xmax = pt.x;

                    if( ymin > pt.y )
                        ymin = pt.y;

                    if( ymax < pt.y )
                        ymax = pt.y;
                }
            }
            else
            {
                Cv32suf v;
                // init values
                xmin = xmax = CV_TOGGLE_FLT(pt.x);
                ymin = ymax = CV_TOGGLE_FLT(pt.y);

                for( i = 1; i < ptseq->total; i++ )
                {
                    CV_READ_SEQ_ELEM( pt, reader );
                    pt.x = CV_TOGGLE_FLT(pt.x);
                    pt.y = CV_TOGGLE_FLT(pt.y);

                    if( xmin > pt.x )
                        xmin = pt.x;

                    if( xmax < pt.x )
                        xmax = pt.x;

                    if( ymin > pt.y )
                        ymin = pt.y;

                    if( ymax < pt.y )
                        ymax = pt.y;
                }

                v.i = CV_TOGGLE_FLT(xmin); xmin = cvFloor(v.f);
                v.i = CV_TOGGLE_FLT(ymin); ymin = cvFloor(v.f);
                // because right and bottom sides of the bounding rectangle are not inclusive
                // (note +1 in width and height calculation below), cvFloor is used here instead of cvCeil
                v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
                v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
            }
        }
        rect.x = xmin;
        rect.y = ymin;
        rect.width = xmax - xmin + 1;
        rect.height = ymax - ymin + 1;
    }
    if( update )
        ((CvContour*)ptseq)->rect = rect;
    return rect;
}
CV_IMPL void
cvCalcOpticalFlowBM( const void* srcarrA, const void* srcarrB,
                     CvSize blockSize, CvSize shiftSize,
                     CvSize maxRange, int usePrevious,
                     void* velarrx, void* velarry )
{
    CvMat stubA, *srcA = cvGetMat( srcarrA, &stubA );
    CvMat stubB, *srcB = cvGetMat( srcarrB, &stubB );

    CvMat stubx, *velx = cvGetMat( velarrx, &stubx );
    CvMat stuby, *vely = cvGetMat( velarry, &stuby );

    if( !CV_ARE_TYPES_EQ( srcA, srcB ))
        CV_Error( CV_StsUnmatchedFormats, "Source images have different formats" );

    if( !CV_ARE_TYPES_EQ( velx, vely ))
        CV_Error( CV_StsUnmatchedFormats, "Destination images have different formats" );

    CvSize velSize =
    {
        (srcA->width - blockSize.width)/shiftSize.width,
        (srcA->height - blockSize.height)/shiftSize.height
    };

    if( !CV_ARE_SIZES_EQ( srcA, srcB ) ||
        !CV_ARE_SIZES_EQ( velx, vely ) ||
        velx->width != velSize.width ||
        vely->height != velSize.height )
        CV_Error( CV_StsUnmatchedSizes, "" );

    if( CV_MAT_TYPE( srcA->type ) != CV_8UC1 ||
        CV_MAT_TYPE( velx->type ) != CV_32FC1 )
        CV_Error( CV_StsUnsupportedFormat, "Source images must have 8uC1 type and "
                                           "destination images must have 32fC1 type" );

    if( srcA->step != srcB->step || velx->step != vely->step )
        CV_Error( CV_BadStep, "two source or two destination images have different steps" );

    const int SMALL_DIFF=2;
    const int BIG_DIFF=128;
    
    // scanning scheme coordinates
    cv::vector<CvPoint> _ss((2 * maxRange.width + 1) * (2 * maxRange.height + 1));
    CvPoint* ss = &_ss[0];
    int ss_count = 0;

    int blWidth = blockSize.width, blHeight = blockSize.height;
    int blSize = blWidth*blHeight;
    int acceptLevel = blSize * SMALL_DIFF;
    int escapeLevel = blSize * BIG_DIFF;
    
    int i, j;

    cv::vector<uchar> _blockA(cvAlign(blSize + 16, 16));
    uchar* blockA = (uchar*)cvAlignPtr(&_blockA[0], 16);
    
    // Calculate scanning scheme
    int min_count = MIN( maxRange.width, maxRange.height );
    
    // use spiral search pattern
    // 
    //     9 10 11 12
    //     8  1  2 13
    //     7  *  3 14
    //     6  5  4 15      
    //... 20 19 18 17
    //
    
    for( i = 0; i < min_count; i++ )
    {
        // four cycles along sides
        int x = -i-1, y = x;
        
        // upper side
        for( j = -i; j <= i + 1; j++, ss_count++ )
        {
            ss[ss_count].x = ++x;
            ss[ss_count].y = y;
        }
        
        // right side
        for( j = -i; j <= i + 1; j++, ss_count++ )
        {
            ss[ss_count].x = x;
            ss[ss_count].y = ++y;
        }
        
        // bottom side
        for( j = -i; j <= i + 1; j++, ss_count++ )
        {
            ss[ss_count].x = --x;
            ss[ss_count].y = y;
        }
        
        // left side
        for( j = -i; j <= i + 1; j++, ss_count++ )
        {
            ss[ss_count].x = x;
            ss[ss_count].y = --y;
        }
    }
    
    // the rest part
    if( maxRange.width < maxRange.height )
    {
        int xleft = -min_count;
        
        // cycle by neighbor rings
        for( i = min_count; i < maxRange.height; i++ )
        {
            // two cycles by x
            int y = -(i + 1);
            int x = xleft;
            
            // upper side
            for( j = -maxRange.width; j <= maxRange.width; j++, ss_count++, x++ )
            {
                ss[ss_count].x = x;
                ss[ss_count].y = y;
            }
            
            x = xleft;
            y = -y;
            // bottom side
            for( j = -maxRange.width; j <= maxRange.width; j++, ss_count++, x++ )
            {
                ss[ss_count].x = x;
                ss[ss_count].y = y;
            }
        }
    }
    else if( maxRange.width > maxRange.height )
    {
        int yupper = -min_count;
        
        // cycle by neighbor rings
        for( i = min_count; i < maxRange.width; i++ )
        {
            // two cycles by y
            int x = -(i + 1);
            int y = yupper;
            
            // left side
            for( j = -maxRange.height; j <= maxRange.height; j++, ss_count++, y++ )
            {
                ss[ss_count].x = x;
                ss[ss_count].y = y;
            }
            
            y = yupper;
            x = -x;
            // right side
            for( j = -maxRange.height; j <= maxRange.height; j++, ss_count++, y++ )
            {
                ss[ss_count].x = x;
                ss[ss_count].y = y;
            }
        }
    }

    int maxX = srcB->cols - blockSize.width, maxY = srcB->rows - blockSize.height;
    const uchar* Adata = srcA->data.ptr;
    const uchar* Bdata = srcB->data.ptr;
    int Astep = srcA->step, Bstep = srcB->step;
    
    // compute the flow
    for( i = 0; i < velx->rows; i++ )
    {
        float* vx = (float*)(velx->data.ptr + velx->step*i);
        float* vy = (float*)(vely->data.ptr + vely->step*i);
        
        for( j = 0; j < velx->cols; j++ )
        {
            int X1 = j*shiftSize.width, Y1 = i*shiftSize.height, X2, Y2;
            int offX = 0, offY = 0;
            
            if( usePrevious )
            {
                offX = cvRound(vx[j]);
                offY = cvRound(vy[j]);
            }

            int k;
            for( k = 0; k < blHeight; k++ )
                memcpy( blockA + k*blWidth, Adata + Astep*(Y1 + k) + X1, blWidth );
            
            X2 = X1 + offX;
            Y2 = Y1 + offY;
            int dist = INT_MAX;
            if( 0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY )
                dist = cmpBlocks( blockA, Bdata + Bstep*Y2 + X2, Bstep, blockSize );

            int countMin = 1;
            int sumx = offX, sumy = offY;

            if( dist > acceptLevel )
            {
                // do brute-force search
                for( k = 0; k < ss_count; k++ )
                {
                    int dx = offX + ss[k].x;
                    int dy = offY + ss[k].y;
                    X2 = X1 + dx;
                    Y2 = Y1 + dy;
                    
                    if( !(0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY) )
                        continue;
                    
                    int tmpDist = cmpBlocks( blockA, Bdata + Bstep*Y2 + X2, Bstep, blockSize );
                    if( tmpDist < acceptLevel )
                    {
                        sumx = dx; sumy = dy;
                        countMin = 1;
                        break;
                    }
                    
                    if( tmpDist < dist )
                    {
                        dist = tmpDist;
                        sumx = dx; sumy = dy;
                        countMin = 1;
                    }
                    else if( tmpDist == dist )
                    {
                        sumx += dx; sumy += dy;
                        countMin++;
                    }
                }
                
                if( dist > escapeLevel )
                {
                    sumx = offX;
                    sumy = offY;
                    countMin = 1;
                }
            }
            
            vx[j] = (float)sumx/countMin;
            vy[j] = (float)sumy/countMin;
        } 
    }
}