// compute gradient magnitude and orientation at each location (uses sse) void gradMag( float *I, float *M, float *O, int h, int w, int d ) { int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4/4; y++ ) _M2[y]=ADD(MUL(_Gx[y],_Gx[y]),MUL(_Gy[y],_Gy[y])); // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; _m = CMPGT( _M2[y1], _M2[y] ); _M2[y] = OR( AND(_m,_M2[y1]), ANDNOT(_m,_M2[y]) ); _Gx[y] = OR( AND(_m,_Gx[y1]), ANDNOT(_m,_Gx[y]) ); _Gy[y] = OR( AND(_m,_Gy[y1]), ANDNOT(_m,_Gy[y]) ); } } // compute gradient mangitude (M) and normalize Gx for( y=0; y<h4/4; y++ ) { _m = MIN( RCPSQRT(_M2[y]), SET(1e10f) ); _M2[y] = RCP(_m); _Gx[y] = MUL( MUL(_Gx[y],_m), SET(acMult) ); _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); }; memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); }
/* compute gradient magnitude and orientation at each location */ void gradMag(double *I, double *M, float *O, int h, int w, int d ) { int x, y, c, a=w*h; double m, m1, dx, dx1, dy, dy1, rx, ry; double *Ix, *Ix0, *Ix1, *Iy0, *Iy1, *M0; float o, *O0; float *acost = acosTable(), acMult=(25000-1)/2.2f; for( x=0; x<w; x++ ) { rx=.5; M0=M+x*h; O0=O+x*h; Ix=I+x*h; Ix0=Ix-h; Ix1=Ix+h; if(x==0) { Ix0=Ix; rx=1; } else if(x==w-1) { Ix1=Ix; rx=1; } for( y=0; y<h; y++ ) { if(y==0) { Iy0=Ix-0; Iy1=Ix+1; ry=1; } if(y==1) { Iy0=Ix-1; Iy1=Ix+1; ry=.5; } if(y==h-1) { Iy0=Ix-1; Iy1=Ix+0; ry=1; } dy=(*Iy1-*Iy0)*ry; dx=(*Ix1-*Ix0)*rx; m=dx*dx+dy*dy; for(c=1; c<d; c++) { dy1=(*(Iy1+c*a)-*(Iy0+c*a))*ry; dx1=(*(Ix1+c*a)-*(Ix0+c*a))*rx; m1=dx1*dx1+dy1*dy1; if(m1>m) { m=m1; dx=dx1; dy=dy1; } } if( m==0 ) { o=0; } else { m=sqrt(m); /* o=acos(dx/m); */ o = acost[(int)((dx/m+1.1f)*acMult)]; if( o>PI-1e-5 ) o=0; else if( dy<0 ) o=(float)PI-o; } *(M0++) = m; *(O0++) = o; Ix0++; Ix1++; Iy0++; Iy1++; Ix++; } } }
// compute gradient magnitude and orientation at each location (uses sse) void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) { int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult = 10000.0f; // allocate memory for storing one column of output (padded so h4%4==0) h4 = (h % 4 == 0) ? h : h - (h % 4) + 4; s = d * h4 * sizeof(float); M2 = (float*) alMalloc(s, 16); _M2 = (__m128*) M2; Gx = (float*) alMalloc(s, 16); _Gx = (__m128*) Gx; Gy = (float*) alMalloc(s, 16); _Gy = (__m128*) Gy; // compute gradient magnitude and orientation for each column for ( x = 0; x < w; x++ ) { // compute gradients (Gx, Gy) with maximum squared magnitude (M2) for (c = 0; c < d; c++) { grad1( I + x * h + c * w * h, Gx + c * h4, Gy + c * h4, h, w, x ); for ( y = 0; y < h4 / 4; y++ ) { y1 = h4 / 4 * c + y; _M2[y1] = ADD(MUL(_Gx[y1], _Gx[y1]), MUL(_Gy[y1], _Gy[y1])); if ( c == 0 ) { continue; } _m = CMPGT( _M2[y1], _M2[y] ); _M2[y] = OR( AND(_m, _M2[y1]), ANDNOT(_m, _M2[y]) ); _Gx[y] = OR( AND(_m, _Gx[y1]), ANDNOT(_m, _Gx[y]) ); _Gy[y] = OR( AND(_m, _Gy[y1]), ANDNOT(_m, _Gy[y]) ); } } // compute gradient mangitude (M) and normalize Gx for ( y = 0; y < h4 / 4; y++ ) { _m = MINsse( RCPSQRT(_M2[y]), SET(1e10f) ); _M2[y] = RCP(_m); if (O) { _Gx[y] = MUL( MUL(_Gx[y], _m), SET(acMult) ); } if (O) { _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); } }; memcpy( M + x * h, M2, h * sizeof(float) ); // compute and store gradient orientation (O) via table lookup if ( O != 0 ) for ( y = 0; y < h; y++ ) { O[x * h + y] = acost[(int)Gx[y]]; } if ( O != 0 && full ) { y1 = ((~size_t(O + x * h) + 1) & 15) / 4; y = 0; for ( ; y < y1; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; } for ( ; y < h - 4; y += 4 ) STRu( O[y + x * h], ADD( LDu(O[y + x * h]), AND(CMPLT(LDu(Gy[y]), SET(0.f)), SET(PI)) ) ); for ( ; y < h; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; } } } alFree(Gx); alFree(Gy); alFree(M2); }
void pcl::people::HOG::gradMag( float *I, int h, int w, int d, float *M, float *O ) const { #if defined(__SSE2__) int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4/4; y++ ) _M2[y]=pcl::sse_add(pcl::sse_mul(_Gx[y],_Gx[y]),pcl::sse_mul(_Gy[y],_Gy[y])); // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; _m = pcl::sse_cmpgt( _M2[y1], _M2[y] ); _M2[y] = pcl::sse_or( pcl::sse_and(_m,_M2[y1]), pcl::sse_andnot(_m,_M2[y]) ); _Gx[y] = pcl::sse_or( pcl::sse_and(_m,_Gx[y1]), pcl::sse_andnot(_m,_Gx[y]) ); _Gy[y] = pcl::sse_or( pcl::sse_and(_m,_Gy[y1]), pcl::sse_andnot(_m,_Gy[y]) ); } } // compute gradient magnitude (M) and normalize Gx for( y=0; y<h4/4; y++ ) { _m = pcl::sse_min( pcl::sse_rcpsqrt(_M2[y]), pcl::sse_set(1e10f) ); _M2[y] = pcl::sse_rcp(_m); _Gx[y] = pcl::sse_mul( pcl::sse_mul(_Gx[y],_m), pcl::sse_set(acMult) ); _Gx[y] = pcl::sse_xor( _Gx[y], pcl::sse_and(_Gy[y], pcl::sse_set(-0.f)) ); }; memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); #else int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; float *acost = acosTable(), acMult=25000/2.02f; // allocate memory for storing one column of output (padded so h4%4==0) h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); M2=(float*) alMalloc(s,16); Gx=(float*) alMalloc(s,16); Gy=(float*) alMalloc(s,16); float m; // compute gradient magnitude and orientation for each column for( x=0; x<w; x++ ) { // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x ); for( y=0; y<d*h4; y++ ) { M2[y] = Gx[y] * Gx[y] + Gy[y] * Gy[y]; } // store gradients with maximum response in the first channel for(c=1; c<d; c++) { for( y=0; y<h4/4; y++ ) { y1=h4/4*c+y; for (int ii = 0; ii < 4; ++ii) { if (M2[y1 * 4 + ii] > M2[y * 4 + ii]) { M2[y * 4 + ii] = M2[y1 * 4 + ii]; Gx[y * 4 + ii] = Gx[y1 * 4 + ii]; Gy[y * 4 + ii] = Gy[y1 * 4 + ii]; } } } } // compute gradient magnitude (M) and normalize Gx for( y=0; y<h4; y++ ) { m = 1.0f/sqrtf(M2[y]); m = m < 1e10f ? m : 1e10f; M2[y] = 1.0f / m; Gx[y] = ((Gx[y] * m) * acMult); if (Gy[y] < 0) Gx[y] = -Gx[y]; } memcpy( M+x*h, M2, h*sizeof(float) ); // compute and store gradient orientation (O) via table lookup if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]]; } alFree(Gx); alFree(Gy); alFree(M2); #endif }