示例#1
0
	void pyrDown(const T1& in, T2& out)
	{
		const uword KERNEL_SIZE = 5;

		//uword width = std::min((src.n_cols - SZ / 2 - 1) / 2;
		
		circular_buffer<arma::ivec> cols(KERNEL_SIZE);

#ifdef __VXWORKS__
		ivec dummy(out.n_rows); dummy.zeros();
#endif
		for (arma::uword i = 0 ; i < KERNEL_SIZE ; i++)
#ifdef __VXWORKS__
			cols.push_back(dummy);
#else
			cols.push_back(zeros<ivec>(out.n_rows));
#endif

		int sx0 = -(int)KERNEL_SIZE / 2, sx = sx0;

		arma::umat tab(KERNEL_SIZE + 2, 2);
		uword* lptr = tab.colptr(0),
			 * rptr = tab.colptr(1);
		for (uword y = 0 ; y <= KERNEL_SIZE + 1 ; y++) {
			lptr[y] = borderInterpolate((int)y + sx0, (int)in.n_rows);
			rptr[y] = borderInterpolate((int)(y + (out.n_rows - 1) * 2) + sx0, (int)in.n_rows);
		}

		// gaussian convolution with 
		for (arma::uword x = 0 ; x < out.n_cols ; x++) {
			typename T2::elem_type* dst = out.colptr(x);

			// vertical convolution and decimation
			for ( ; sx <= (int)x * 2 + 2 ; sx++) {
				ivec& col = cols.next();
				int* colptr = col.memptr();

				// interpolate border
				const typename T2::elem_type* src = in.colptr(borderInterpolate(sx, (int)in.n_cols));

				colptr[0] = src[lptr[2]] * 6 + (src[lptr[1]] + src[lptr[3]]) * 4 + (src[lptr[0]] + src[lptr[4]]);

				for (arma::uword y = 1 ; y < out.n_rows - 1; y++)
				//concurrency::parallel_for(uword(1), out.n_rows - 1, [&](uword y) {
					colptr[y] = src[y * 2] * 6 + 
							 (src[y * 2 - 1] + src[y * 2 + 1]) * 4 + 
							 (src[y * 2 - 2] + src[y * 2 + 2]);
				//});

				colptr[out.n_rows - 1] = src[rptr[2]] * 6 + 
									  (src[rptr[1]] + src[rptr[3]]) * 4 + 
									  (src[rptr[0]] + src[rptr[4]]);
			}

			const int* col0 = cols[0].memptr();
			const int* col1 = cols[1].memptr();
			const int* col2 = cols[2].memptr();
			const int* col3 = cols[3].memptr();
			const int* col4 = cols[4].memptr();

			// horizontal convolution and decimation
#if ENABLE_SSE2
			//__m128i d = _mm_set1_epi16(128);
			//uword y = 0;
			//for ( ; y <= out.n_rows - 16 ; y += 16) {
			//	__m128i c0, c1, c2, c3, c4, t0, t1;
			//	c0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col0 + y)),
			//						 _mm_load_si128((const __m128i*)(col0 + y + 4)));
			//	c1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col1 + y)),
			//						 _mm_load_si128((const __m128i*)(col1 + y + 4)));
			//	c2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col2 + y)),
			//						 _mm_load_si128((const __m128i*)(col2 + y + 4)));
			//	c3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col3 + y)),
			//						 _mm_load_si128((const __m128i*)(col3 + y + 4)));
			//	c4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col4 + y)),
			//						 _mm_load_si128((const __m128i*)(col4 + y + 4)));

			//	c0 = _mm_add_epi16(r0, r4);
			//	c1 = _mm_add_epi16(_mm_add_epi16(c1, c3), c2);
			//}
#else
			for (arma::uword y = 0 ; y < out.n_rows ; y++)
			//concurrency::parallel_for(uword(0), out.n_rows, [&](uword y) {
				dst[y] = (typename T2::elem_type)castOp(col2[y] * 6 + (col1[y] + col3[y]) * 4 + col0[y] + col4[y]);
			//});
#endif
		}
	}
示例#2
0
template<class CastOp, class VecOp> void
pyrDown_( const Mat& _src, Mat& _dst, int borderType )
{
    const int PD_SZ = 5;
    typedef typename CastOp::type1 WT;
    typedef typename CastOp::rtype T;

    CV_Assert( !_src.empty() );
    Size ssize = _src.size(), dsize = _dst.size();
    int cn = _src.channels();
    int bufstep = (int)alignSize(dsize.width*cn, 16);
    AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
    WT* buf = alignPtr((WT*)_buf, 16);
    int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
    AutoBuffer<int> _tabM(dsize.width*cn);
    int* tabM = _tabM;
    WT* rows[PD_SZ];
    CastOp castOp;
    VecOp vecOp;

    CV_Assert( ssize.width > 0 && ssize.height > 0 &&
               std::abs(dsize.width*2 - ssize.width) <= 2 &&
               std::abs(dsize.height*2 - ssize.height) <= 2 );
    int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);

    for( x = 0; x <= PD_SZ+1; x++ )
    {
        int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn;
        int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn;
        for( k = 0; k < cn; k++ )
        {
            tabL[x*cn + k] = sx0 + k;
            tabR[x*cn + k] = sx1 + k;
        }
    }

    ssize.width *= cn;
    dsize.width *= cn;
    width0 *= cn;

    for( x = 0; x < dsize.width; x++ )
        tabM[x] = (x/cn)*2*cn + x % cn;

    for( int y = 0; y < dsize.height; y++ )
    {
        T* dst = (T*)(_dst.data + _dst.step*y);
        WT *row0, *row1, *row2, *row3, *row4;

        // fill the ring buffer (horizontal convolution and decimation)
        for( ; sy <= y*2 + 2; sy++ )
        {
            WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;
            int _sy = borderInterpolate(sy, ssize.height, borderType);
            const T* src = (const T*)(_src.data + _src.step*_sy);
            int limit = cn;
            const int* tab = tabL;

            for( x = 0;;)
            {
                for( ; x < limit; x++ )
                {
                    row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 +
                        src[tab[x]] + src[tab[x+cn*4]];
                }

                if( x == dsize.width )
                    break;

                if( cn == 1 )
                {
                    for( ; x < width0; x++ )
                        row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 +
                            src[x*2 - 2] + src[x*2 + 2];
                }
                else if( cn == 3 )
                {
                    for( ; x < width0; x += 3 )
                    {
                        const T* s = src + x*2;
                        WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6];
                        WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7];
                        WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8];
                        row[x] = t0; row[x+1] = t1; row[x+2] = t2;
                    }
                }
                else if( cn == 4 )
                {
                    for( ; x < width0; x += 4 )
                    {
                        const T* s = src + x*2;
                        WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8];
                        WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9];
                        row[x] = t0; row[x+1] = t1;
                        t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10];
                        t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11];
                        row[x+2] = t0; row[x+3] = t1;
                    }
                }
                else
                {
                    for( ; x < width0; x++ )
                    {
                        int sx = tabM[x];
                        row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 +
                            src[sx - cn*2] + src[sx + cn*2];
                    }
                }

                limit = dsize.width;
                tab = tabR - x;
            }
        }

        // do vertical convolution and decimation and write the result to the destination image
        for( k = 0; k < PD_SZ; k++ )
            rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;
        row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];

        x = vecOp(rows, dst, (int)_dst.step, dsize.width);
        for( ; x < dsize.width; x++ )
            dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]);
    }
}
示例#3
0
template<class CastOp, class VecOp> void
pyrUp_( const Mat& _src, Mat& _dst, int)
{
    const int PU_SZ = 3;
    typedef typename CastOp::type1 WT;
    typedef typename CastOp::rtype T;

    Size ssize = _src.size(), dsize = _dst.size();
    int cn = _src.channels();
    int bufstep = (int)alignSize((dsize.width+1)*cn, 16);
    AutoBuffer<WT> _buf(bufstep*PU_SZ + 16);
    WT* buf = alignPtr((WT*)_buf, 16);
    AutoBuffer<int> _dtab(ssize.width*cn);
    int* dtab = _dtab;
    WT* rows[PU_SZ];
    CastOp castOp;
    VecOp vecOp;

    CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 &&
               std::abs(dsize.height - ssize.height*2) == dsize.height % 2);
    int k, x, sy0 = -PU_SZ/2, sy = sy0;

    ssize.width *= cn;
    dsize.width *= cn;

    for( x = 0; x < ssize.width; x++ )
        dtab[x] = (x/cn)*2*cn + x % cn;

    for( int y = 0; y < ssize.height; y++ )
    {
        T* dst0 = (T*)(_dst.data + _dst.step*y*2);
        T* dst1 = (T*)(_dst.data + _dst.step*(y*2+1));
        WT *row0, *row1, *row2;

        if( y*2+1 >= dsize.height )
            dst1 = dst0;

        // fill the ring buffer (horizontal convolution and decimation)
        for( ; sy <= y + 1; sy++ )
        {
            WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep;
            int _sy = borderInterpolate(sy*2, dsize.height, BORDER_REFLECT_101)/2;
            const T* src = (const T*)(_src.data + _src.step*_sy);

            if( ssize.width == cn )
            {
                for( x = 0; x < cn; x++ )
                    row[x] = row[x + cn] = src[x]*8;
                continue;
            }

            for( x = 0; x < cn; x++ )
            {
                int dx = dtab[x];
                WT t0 = src[x]*6 + src[x + cn]*2;
                WT t1 = (src[x] + src[x + cn])*4;
                row[dx] = t0; row[dx + cn] = t1;
                dx = dtab[ssize.width - cn + x];
                int sx = ssize.width - cn + x;
                t0 = src[sx - cn] + src[sx]*7;
                t1 = src[sx]*8;
                row[dx] = t0; row[dx + cn] = t1;
            }

            for( x = cn; x < ssize.width - cn; x++ )
            {
                int dx = dtab[x];
                WT t0 = src[x-cn] + src[x]*6 + src[x+cn];
                WT t1 = (src[x] + src[x+cn])*4;
                row[dx] = t0;
                row[dx+cn] = t1;
            }
        }

        // do vertical convolution and decimation and write the result to the destination image
        for( k = 0; k < PU_SZ; k++ )
            rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep;
        row0 = rows[0]; row1 = rows[1]; row2 = rows[2];

        x = vecOp(rows, dst0, (int)_dst.step, dsize.width);
        for( ; x < dsize.width; x++ )
        {
            T t1 = castOp((row1[x] + row2[x])*4);
            T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
            dst1[x] = t1; dst0[x] = t0;
        }
    }
}
示例#4
0
void CvHOGEvaluator::integralHistogram( const Mat &img, std::vector<Mat> &histogram, Mat &norm, int nbins ) const
{
  CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );
  int x, y, binIdx;

  Size gradSize( img.size() );
  Size histSize( histogram[0].size() );
  Mat grad( gradSize, CV_32F );
  Mat qangle( gradSize, CV_8U );

  AutoBuffer<int> mapbuf( gradSize.width + gradSize.height + 4 );
  int* xmap = (int*) mapbuf + 1;
  int* ymap = xmap + gradSize.width + 2;

  const int borderType = (int) BORDER_REPLICATE;

  for ( x = -1; x < gradSize.width + 1; x++ )
    xmap[x] = borderInterpolate( x, gradSize.width, borderType );
  for ( y = -1; y < gradSize.height + 1; y++ )
    ymap[y] = borderInterpolate( y, gradSize.height, borderType );

  int width = gradSize.width;
  AutoBuffer<float> _dbuf( width * 4 );
  float* dbuf = _dbuf;
  Mat Dx( 1, width, CV_32F, dbuf );
  Mat Dy( 1, width, CV_32F, dbuf + width );
  Mat Mag( 1, width, CV_32F, dbuf + width * 2 );
  Mat Angle( 1, width, CV_32F, dbuf + width * 3 );

  float angleScale = (float) ( nbins / CV_PI );

  for ( y = 0; y < gradSize.height; y++ )
  {
    const uchar* currPtr = img.data + img.step * ymap[y];
    const uchar* prevPtr = img.data + img.step * ymap[y - 1];
    const uchar* nextPtr = img.data + img.step * ymap[y + 1];
    float* gradPtr = (float*) grad.ptr( y );
    uchar* qanglePtr = (uchar*) qangle.ptr( y );

    for ( x = 0; x < width; x++ )
    {
      dbuf[x] = (float) ( currPtr[xmap[x + 1]] - currPtr[xmap[x - 1]] );
      dbuf[width + x] = (float) ( nextPtr[xmap[x]] - prevPtr[xmap[x]] );
    }
    cartToPolar( Dx, Dy, Mag, Angle, false );
    for ( x = 0; x < width; x++ )
    {
      float mag = dbuf[x + width * 2];
      float angle = dbuf[x + width * 3];
      angle = angle * angleScale - 0.5f;
      int bidx = cvFloor( angle );
      angle -= bidx;
      if( bidx < 0 )
        bidx += nbins;
      else if( bidx >= nbins )
        bidx -= nbins;

      qanglePtr[x] = (uchar) bidx;
      gradPtr[x] = mag;
    }
  }
  integral( grad, norm, grad.depth() );

  float* histBuf;
  const float* magBuf;
  const uchar* binsBuf;

  int binsStep = (int) ( qangle.step / sizeof(uchar) );
  int histStep = (int) ( histogram[0].step / sizeof(float) );
  int magStep = (int) ( grad.step / sizeof(float) );
  for ( binIdx = 0; binIdx < nbins; binIdx++ )
  {
    histBuf = (float*) histogram[binIdx].data;
    magBuf = (const float*) grad.data;
    binsBuf = (const uchar*) qangle.data;

    memset( histBuf, 0, histSize.width * sizeof ( histBuf[0] ) );
    histBuf += histStep + 1;
    for ( y = 0; y < qangle.rows; y++ )
    {
      histBuf[-1] = 0.f;
      float strSum = 0.f;
      for ( x = 0; x < qangle.cols; x++ )
      {
        if( binsBuf[x] == binIdx )
          strSum += magBuf[x];
        histBuf[x] = histBuf[-histStep + x] + strSum;
      }
      histBuf += histStep;
      binsBuf += binsStep;
      magBuf += magStep;
    }
  }
}