void pyrDown(const T1& in, T2& out) { const uword KERNEL_SIZE = 5; //uword width = std::min((src.n_cols - SZ / 2 - 1) / 2; circular_buffer<arma::ivec> cols(KERNEL_SIZE); #ifdef __VXWORKS__ ivec dummy(out.n_rows); dummy.zeros(); #endif for (arma::uword i = 0 ; i < KERNEL_SIZE ; i++) #ifdef __VXWORKS__ cols.push_back(dummy); #else cols.push_back(zeros<ivec>(out.n_rows)); #endif int sx0 = -(int)KERNEL_SIZE / 2, sx = sx0; arma::umat tab(KERNEL_SIZE + 2, 2); uword* lptr = tab.colptr(0), * rptr = tab.colptr(1); for (uword y = 0 ; y <= KERNEL_SIZE + 1 ; y++) { lptr[y] = borderInterpolate((int)y + sx0, (int)in.n_rows); rptr[y] = borderInterpolate((int)(y + (out.n_rows - 1) * 2) + sx0, (int)in.n_rows); } // gaussian convolution with for (arma::uword x = 0 ; x < out.n_cols ; x++) { typename T2::elem_type* dst = out.colptr(x); // vertical convolution and decimation for ( ; sx <= (int)x * 2 + 2 ; sx++) { ivec& col = cols.next(); int* colptr = col.memptr(); // interpolate border const typename T2::elem_type* src = in.colptr(borderInterpolate(sx, (int)in.n_cols)); colptr[0] = src[lptr[2]] * 6 + (src[lptr[1]] + src[lptr[3]]) * 4 + (src[lptr[0]] + src[lptr[4]]); for (arma::uword y = 1 ; y < out.n_rows - 1; y++) //concurrency::parallel_for(uword(1), out.n_rows - 1, [&](uword y) { colptr[y] = src[y * 2] * 6 + (src[y * 2 - 1] + src[y * 2 + 1]) * 4 + (src[y * 2 - 2] + src[y * 2 + 2]); //}); colptr[out.n_rows - 1] = src[rptr[2]] * 6 + (src[rptr[1]] + src[rptr[3]]) * 4 + (src[rptr[0]] + src[rptr[4]]); } const int* col0 = cols[0].memptr(); const int* col1 = cols[1].memptr(); const int* col2 = cols[2].memptr(); const int* col3 = cols[3].memptr(); const int* col4 = cols[4].memptr(); // horizontal convolution and decimation #if ENABLE_SSE2 //__m128i d = _mm_set1_epi16(128); //uword y = 0; //for ( ; y <= out.n_rows - 16 ; y += 16) { // __m128i c0, c1, c2, c3, c4, t0, t1; // c0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col0 + y)), // _mm_load_si128((const __m128i*)(col0 + y + 4))); // c1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col1 + y)), // _mm_load_si128((const __m128i*)(col1 + y + 4))); // c2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col2 + y)), // _mm_load_si128((const __m128i*)(col2 + y + 4))); // c3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col3 + y)), // _mm_load_si128((const __m128i*)(col3 + y + 4))); // c4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(col4 + y)), // _mm_load_si128((const __m128i*)(col4 + y + 4))); // c0 = _mm_add_epi16(r0, r4); // c1 = _mm_add_epi16(_mm_add_epi16(c1, c3), c2); //} #else for (arma::uword y = 0 ; y < out.n_rows ; y++) //concurrency::parallel_for(uword(0), out.n_rows, [&](uword y) { dst[y] = (typename T2::elem_type)castOp(col2[y] * 6 + (col1[y] + col3[y]) * 4 + col0[y] + col4[y]); //}); #endif } }
template<class CastOp, class VecOp> void pyrDown_( const Mat& _src, Mat& _dst, int borderType ) { const int PD_SZ = 5; typedef typename CastOp::type1 WT; typedef typename CastOp::rtype T; CV_Assert( !_src.empty() ); Size ssize = _src.size(), dsize = _dst.size(); int cn = _src.channels(); int bufstep = (int)alignSize(dsize.width*cn, 16); AutoBuffer<WT> _buf(bufstep*PD_SZ + 16); WT* buf = alignPtr((WT*)_buf, 16); int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)]; AutoBuffer<int> _tabM(dsize.width*cn); int* tabM = _tabM; WT* rows[PD_SZ]; CastOp castOp; VecOp vecOp; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width); for( x = 0; x <= PD_SZ+1; x++ ) { int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn; int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn; for( k = 0; k < cn; k++ ) { tabL[x*cn + k] = sx0 + k; tabR[x*cn + k] = sx1 + k; } } ssize.width *= cn; dsize.width *= cn; width0 *= cn; for( x = 0; x < dsize.width; x++ ) tabM[x] = (x/cn)*2*cn + x % cn; for( int y = 0; y < dsize.height; y++ ) { T* dst = (T*)(_dst.data + _dst.step*y); WT *row0, *row1, *row2, *row3, *row4; // fill the ring buffer (horizontal convolution and decimation) for( ; sy <= y*2 + 2; sy++ ) { WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep; int _sy = borderInterpolate(sy, ssize.height, borderType); const T* src = (const T*)(_src.data + _src.step*_sy); int limit = cn; const int* tab = tabL; for( x = 0;;) { for( ; x < limit; x++ ) { row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 + src[tab[x]] + src[tab[x+cn*4]]; } if( x == dsize.width ) break; if( cn == 1 ) { for( ; x < width0; x++ ) row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 + src[x*2 - 2] + src[x*2 + 2]; } else if( cn == 3 ) { for( ; x < width0; x += 3 ) { const T* s = src + x*2; WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6]; WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7]; WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8]; row[x] = t0; row[x+1] = t1; row[x+2] = t2; } } else if( cn == 4 ) { for( ; x < width0; x += 4 ) { const T* s = src + x*2; WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8]; WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9]; row[x] = t0; row[x+1] = t1; t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10]; t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11]; row[x+2] = t0; row[x+3] = t1; } } else { for( ; x < width0; x++ ) { int sx = tabM[x]; row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 + src[sx - cn*2] + src[sx + cn*2]; } } limit = dsize.width; tab = tabR - x; } } // do vertical convolution and decimation and write the result to the destination image for( k = 0; k < PD_SZ; k++ ) rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep; row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4]; x = vecOp(rows, dst, (int)_dst.step, dsize.width); for( ; x < dsize.width; x++ ) dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]); } }
template<class CastOp, class VecOp> void pyrUp_( const Mat& _src, Mat& _dst, int) { const int PU_SZ = 3; typedef typename CastOp::type1 WT; typedef typename CastOp::rtype T; Size ssize = _src.size(), dsize = _dst.size(); int cn = _src.channels(); int bufstep = (int)alignSize((dsize.width+1)*cn, 16); AutoBuffer<WT> _buf(bufstep*PU_SZ + 16); WT* buf = alignPtr((WT*)_buf, 16); AutoBuffer<int> _dtab(ssize.width*cn); int* dtab = _dtab; WT* rows[PU_SZ]; CastOp castOp; VecOp vecOp; CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 && std::abs(dsize.height - ssize.height*2) == dsize.height % 2); int k, x, sy0 = -PU_SZ/2, sy = sy0; ssize.width *= cn; dsize.width *= cn; for( x = 0; x < ssize.width; x++ ) dtab[x] = (x/cn)*2*cn + x % cn; for( int y = 0; y < ssize.height; y++ ) { T* dst0 = (T*)(_dst.data + _dst.step*y*2); T* dst1 = (T*)(_dst.data + _dst.step*(y*2+1)); WT *row0, *row1, *row2; if( y*2+1 >= dsize.height ) dst1 = dst0; // fill the ring buffer (horizontal convolution and decimation) for( ; sy <= y + 1; sy++ ) { WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep; int _sy = borderInterpolate(sy*2, dsize.height, BORDER_REFLECT_101)/2; const T* src = (const T*)(_src.data + _src.step*_sy); if( ssize.width == cn ) { for( x = 0; x < cn; x++ ) row[x] = row[x + cn] = src[x]*8; continue; } for( x = 0; x < cn; x++ ) { int dx = dtab[x]; WT t0 = src[x]*6 + src[x + cn]*2; WT t1 = (src[x] + src[x + cn])*4; row[dx] = t0; row[dx + cn] = t1; dx = dtab[ssize.width - cn + x]; int sx = ssize.width - cn + x; t0 = src[sx - cn] + src[sx]*7; t1 = src[sx]*8; row[dx] = t0; row[dx + cn] = t1; } for( x = cn; x < ssize.width - cn; x++ ) { int dx = dtab[x]; WT t0 = src[x-cn] + src[x]*6 + src[x+cn]; WT t1 = (src[x] + src[x+cn])*4; row[dx] = t0; row[dx+cn] = t1; } } // do vertical convolution and decimation and write the result to the destination image for( k = 0; k < PU_SZ; k++ ) rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep; row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; x = vecOp(rows, dst0, (int)_dst.step, dsize.width); for( ; x < dsize.width; x++ ) { T t1 = castOp((row1[x] + row2[x])*4); T t0 = castOp(row0[x] + row1[x]*6 + row2[x]); dst1[x] = t1; dst0[x] = t0; } } }
void CvHOGEvaluator::integralHistogram( const Mat &img, std::vector<Mat> &histogram, Mat &norm, int nbins ) const { CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 ); int x, y, binIdx; Size gradSize( img.size() ); Size histSize( histogram[0].size() ); Mat grad( gradSize, CV_32F ); Mat qangle( gradSize, CV_8U ); AutoBuffer<int> mapbuf( gradSize.width + gradSize.height + 4 ); int* xmap = (int*) mapbuf + 1; int* ymap = xmap + gradSize.width + 2; const int borderType = (int) BORDER_REPLICATE; for ( x = -1; x < gradSize.width + 1; x++ ) xmap[x] = borderInterpolate( x, gradSize.width, borderType ); for ( y = -1; y < gradSize.height + 1; y++ ) ymap[y] = borderInterpolate( y, gradSize.height, borderType ); int width = gradSize.width; AutoBuffer<float> _dbuf( width * 4 ); float* dbuf = _dbuf; Mat Dx( 1, width, CV_32F, dbuf ); Mat Dy( 1, width, CV_32F, dbuf + width ); Mat Mag( 1, width, CV_32F, dbuf + width * 2 ); Mat Angle( 1, width, CV_32F, dbuf + width * 3 ); float angleScale = (float) ( nbins / CV_PI ); for ( y = 0; y < gradSize.height; y++ ) { const uchar* currPtr = img.data + img.step * ymap[y]; const uchar* prevPtr = img.data + img.step * ymap[y - 1]; const uchar* nextPtr = img.data + img.step * ymap[y + 1]; float* gradPtr = (float*) grad.ptr( y ); uchar* qanglePtr = (uchar*) qangle.ptr( y ); for ( x = 0; x < width; x++ ) { dbuf[x] = (float) ( currPtr[xmap[x + 1]] - currPtr[xmap[x - 1]] ); dbuf[width + x] = (float) ( nextPtr[xmap[x]] - prevPtr[xmap[x]] ); } cartToPolar( Dx, Dy, Mag, Angle, false ); for ( x = 0; x < width; x++ ) { float mag = dbuf[x + width * 2]; float angle = dbuf[x + width * 3]; angle = angle * angleScale - 0.5f; int bidx = cvFloor( angle ); angle -= bidx; if( bidx < 0 ) bidx += nbins; else if( bidx >= nbins ) bidx -= nbins; qanglePtr[x] = (uchar) bidx; gradPtr[x] = mag; } } integral( grad, norm, grad.depth() ); float* histBuf; const float* magBuf; const uchar* binsBuf; int binsStep = (int) ( qangle.step / sizeof(uchar) ); int histStep = (int) ( histogram[0].step / sizeof(float) ); int magStep = (int) ( grad.step / sizeof(float) ); for ( binIdx = 0; binIdx < nbins; binIdx++ ) { histBuf = (float*) histogram[binIdx].data; magBuf = (const float*) grad.data; binsBuf = (const uchar*) qangle.data; memset( histBuf, 0, histSize.width * sizeof ( histBuf[0] ) ); histBuf += histStep + 1; for ( y = 0; y < qangle.rows; y++ ) { histBuf[-1] = 0.f; float strSum = 0.f; for ( x = 0; x < qangle.cols; x++ ) { if( binsBuf[x] == binIdx ) strSum += magBuf[x]; histBuf[x] = histBuf[-histStep + x] + strSum; } histBuf += histStep; binsBuf += binsStep; magBuf += magStep; } } }