// compute gradient magnitude and orientation at each location (uses sse) void gradMag( float *I, float *M, float *O, int h, int w, int d ) { int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4/4; y++ ) _M2[y]=ADD(MUL(_Gx[y],_Gx[y]),MUL(_Gy[y],_Gy[y])); // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; _m = CMPGT( _M2[y1], _M2[y] ); _M2[y] = OR( AND(_m,_M2[y1]), ANDNOT(_m,_M2[y]) ); _Gx[y] = OR( AND(_m,_Gx[y1]), ANDNOT(_m,_Gx[y]) ); _Gy[y] = OR( AND(_m,_Gy[y1]), ANDNOT(_m,_Gy[y]) ); } } // compute gradient mangitude (M) and normalize Gx for( y=0; y<h4/4; y++ ) { _m = MIN( RCPSQRT(_M2[y]), SET(1e10f) ); _M2[y] = RCP(_m); _Gx[y] = MUL( MUL(_Gx[y],_m), SET(acMult) ); _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); }; memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); }
// compute gradient magnitude and orientation at each location (uses sse) void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) { int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult = 10000.0f; // allocate memory for storing one column of output (padded so h4%4==0) h4 = (h % 4 == 0) ? h : h - (h % 4) + 4; s = d * h4 * sizeof(float); M2 = (float*) alMalloc(s, 16); _M2 = (__m128*) M2; Gx = (float*) alMalloc(s, 16); _Gx = (__m128*) Gx; Gy = (float*) alMalloc(s, 16); _Gy = (__m128*) Gy; // compute gradient magnitude and orientation for each column for ( x = 0; x < w; x++ ) { // compute gradients (Gx, Gy) with maximum squared magnitude (M2) for (c = 0; c < d; c++) { grad1( I + x * h + c * w * h, Gx + c * h4, Gy + c * h4, h, w, x ); for ( y = 0; y < h4 / 4; y++ ) { y1 = h4 / 4 * c + y; _M2[y1] = ADD(MUL(_Gx[y1], _Gx[y1]), MUL(_Gy[y1], _Gy[y1])); if ( c == 0 ) { continue; } _m = CMPGT( _M2[y1], _M2[y] ); _M2[y] = OR( AND(_m, _M2[y1]), ANDNOT(_m, _M2[y]) ); _Gx[y] = OR( AND(_m, _Gx[y1]), ANDNOT(_m, _Gx[y]) ); _Gy[y] = OR( AND(_m, _Gy[y1]), ANDNOT(_m, _Gy[y]) ); } } // compute gradient mangitude (M) and normalize Gx for ( y = 0; y < h4 / 4; y++ ) { _m = MINsse( RCPSQRT(_M2[y]), SET(1e10f) ); _M2[y] = RCP(_m); if (O) { _Gx[y] = MUL( MUL(_Gx[y], _m), SET(acMult) ); } if (O) { _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); } }; memcpy( M + x * h, M2, h * sizeof(float) ); // compute and store gradient orientation (O) via table lookup if ( O != 0 ) for ( y = 0; y < h; y++ ) { O[x * h + y] = acost[(int)Gx[y]]; } if ( O != 0 && full ) { y1 = ((~size_t(O + x * h) + 1) & 15) / 4; y = 0; for ( ; y < y1; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; } for ( ; y < h - 4; y += 4 ) STRu( O[y + x * h], ADD( LDu(O[y + x * h]), AND(CMPLT(LDu(Gy[y]), SET(0.f)), SET(PI)) ) ); for ( ; y < h; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; } } } alFree(Gx); alFree(Gy); alFree(M2); }
void pcl::people::HOG::gradMag( float *I, int h, int w, int d, float *M, float *O ) const { #if defined(__SSE2__) int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4/4; y++ ) _M2[y]=ADD(MUL(_Gx[y],_Gx[y]),MUL(_Gy[y],_Gy[y])); // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; _m = CMPGT( _M2[y1], _M2[y] ); _M2[y] = OR( AND(_m,_M2[y1]), ANDNOT(_m,_M2[y]) ); _Gx[y] = OR( AND(_m,_Gx[y1]), ANDNOT(_m,_Gx[y]) ); _Gy[y] = OR( AND(_m,_Gy[y1]), ANDNOT(_m,_Gy[y]) ); } } // compute gradient magnitude (M) and normalize Gx for( y=0; y<h4/4; y++ ) { _m = MIN( RCPSQRT(_M2[y]), SET(1e10f) ); _M2[y] = RCP(_m); _Gx[y] = MUL( MUL(_Gx[y],_m), SET(acMult) ); _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); }; memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); #else int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); Gx=(float*) alMalloc(s,16); Gy=(float*) alMalloc(s,16); float m; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4; y++ ) { M2[y] = Gx[y] * Gx[y] + Gy[y] * Gy[y]; } // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; for (int ii = 0; ii < 4; ++ii) { if (M2[y1 * 4 + ii] > M2[y * 4 + ii]) { M2[y * 4 + ii] = M2[y1 * 4 + ii]; Gx[y * 4 + ii] = Gx[y1 * 4 + ii]; Gy[y * 4 + ii] = Gy[y1 * 4 + ii]; } } } } // compute gradient magnitude (M) and normalize Gx for( y=0; y<h4; y++ ) { m = 1.0f/sqrtf(M2[y]); m = m < 1e10f ? m : 1e10f; M2[y] = 1.0f / m; Gx[y] = ((Gx[y] * m) * acMult); if (Gy[y] < 0) Gx[y] = -Gx[y]; } memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); #endif }