コード例 #1
0
ファイル: pi.c プロジェクト: ursache/HPC-hacks
double calcPi_simd_rcp(void)
{
        double x;
        int i;
        double width = 1./(double) num_rects;
        //
        __m256d __width = _mm256_set1_pd(width);
        //
        __m256d __half  = _mm256_set1_pd(0.5);
        __m256d __four  = _mm256_set1_pd(4.0);
        __m256d __one   = _mm256_set1_pd(1.0);
        //
        __m256d __sum   = _mm256_set1_pd(0.0);
        __m256d __i = _mm256_set_pd(0.5, 1.5, 2.5, 3.5);

        for (i = 0; i < num_rects; i += 4)
        {
                __m256d __x = __i *__width;
                __m256d __y = RCP(__one + __x*__x);
                __sum      += __four * __y; // (__one + __x*__x);
                __i         = __i + __four;
        }
        double sum;
        //
        sum  = ((double*) &__sum)[0];
        sum += ((double*) &__sum)[1];
        sum += ((double*) &__sum)[2];
        sum += ((double*) &__sum)[3];
        //
        return width*sum;
}
コード例 #2
0
// normalize gradient magnitude at each location (uses sse)
void gradMagNorm( float *M, float *S, int h, int w, float norm ) {
  __m128 *_M, *_S, _norm; int i=0, n=h*w, n4=n/4;
  _S = (__m128*) S; _M = (__m128*) M; _norm = SET(norm);
  bool sse = !(size_t(M)&15) && !(size_t(S)&15);
  if(sse) for(; i<n4; i++) { *_M=MUL(*_M,RCP(ADD(*_S++,_norm))); _M++; }
  if(sse) i*=4; for(; i<n; i++) M[i] /= (S[i] + norm);
}
コード例 #3
0
// compute gradient magnitude and orientation at each location (uses sse)
void gradMag( float *I, float *M, float *O, int h, int w, int d ) {
  int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m;
  float *acost = acosTable(), acMult=25000/2.02f;
  // allocate memory for storing one column of output (padded so h4%4==0)
  h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);
  M2=(float*) alMalloc(s,16); _M2=(__m128*) M2;
  Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx;
  Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy;
  // compute gradient magnitude and orientation for each column
  for( x=0; x<w; x++ ) {
    // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel
    for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
    for( y=0; y<d*h4/4; y++ ) _M2[y]=ADD(MUL(_Gx[y],_Gx[y]),MUL(_Gy[y],_Gy[y]));
    // store gradients with maximum response in the first channel
    for(c=1; c<d; c++) {
      for( y=0; y<h4/4; y++ ) {
        y1=h4/4*c+y; _m = CMPGT( _M2[y1], _M2[y] );
        _M2[y] = OR( AND(_m,_M2[y1]), ANDNOT(_m,_M2[y]) );
        _Gx[y] = OR( AND(_m,_Gx[y1]), ANDNOT(_m,_Gx[y]) );
        _Gy[y] = OR( AND(_m,_Gy[y1]), ANDNOT(_m,_Gy[y]) );
      }
    }
    // compute gradient mangitude (M) and normalize Gx
    for( y=0; y<h4/4; y++ ) {
      _m = MIN( RCPSQRT(_M2[y]), SET(1e10f) );
      _M2[y] = RCP(_m);
      _Gx[y] = MUL( MUL(_Gx[y],_m), SET(acMult) );
      _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) );
    };
    memcpy( M+x*h, M2, h*sizeof(float) );
    // compute and store gradient orientation (O) via table lookup
    if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
  }
  alFree(Gx); alFree(Gy); alFree(M2);
}
コード例 #4
0
// gradMagNorm( M, S, norm ) - operates on M - see gradientMag.m
// gradientMex('gradientMagNorm',M,S,normConst);
// normalize gradient magnitude at each location (uses sse)
void GradientMagnitudeChannel::gradMagNorm(float *M, float *S, int h, int w) {
  float norm = normalizationConstant;
  __m128 *_M, *_S, _norm; int i=0, n=h*w, n4=n/4;
  _S = (__m128*) S; _M = (__m128*) M; _norm = SET(norm);
  bool sse = !(size_t(M)&15) && !(size_t(S)&15);
  if(sse) { for(; i<n4; i++) *_M++=MUL(*_M,RCP(ADD(*_S++,_norm))); i*=4; }
  for(; i<n; i++) M[i] /= (S[i] + norm);
}
コード例 #5
0
ファイル: cwnet.c プロジェクト: xtforever/xtcw
static void RegisterApplication ( Widget top )
{
    /* -- Register widget classes and constructors */
    RCP( top, termEd );
    /* -- Register application specific actions */
    /* -- Register application specific callbacks */
    RCB( top, quit_cb );
    RCB( top, test_cb );
    RCB( top, exec_lua_cb );
}
コード例 #6
0
ファイル: video.c プロジェクト: xtforever/xtcw
static void RegisterApplication ( Widget top )
{
    /* -- Register widget classes and constructors */
    RCP( top, wcap );

    /* -- Register application specific actions */
    /* -- Register application specific callbacks */
    RCB( top, quit_gui );
    RCB( top, startstop_cb );

}
コード例 #7
0
ファイル: gradient.cpp プロジェクト: joelgallant/skcf
// compute gradient magnitude and orientation at each location (uses sse)
void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) {
    int x, y, y1, c, h4, s;
    float *Gx, *Gy, *M2;
    __m128 *_Gx, *_Gy, *_M2, _m;
    float *acost = acosTable(), acMult = 10000.0f;
    // allocate memory for storing one column of output (padded so h4%4==0)
    h4 = (h % 4 == 0) ? h : h - (h % 4) + 4;
    s = d * h4 * sizeof(float);
    M2 = (float*) alMalloc(s, 16);
    _M2 = (__m128*) M2;
    Gx = (float*) alMalloc(s, 16);
    _Gx = (__m128*) Gx;
    Gy = (float*) alMalloc(s, 16);
    _Gy = (__m128*) Gy;
    // compute gradient magnitude and orientation for each column
    for ( x = 0; x < w; x++ ) {
        // compute gradients (Gx, Gy) with maximum squared magnitude (M2)
        for (c = 0; c < d; c++) {
            grad1( I + x * h + c * w * h, Gx + c * h4, Gy + c * h4, h, w, x );
            for ( y = 0; y < h4 / 4; y++ ) {
                y1 = h4 / 4 * c + y;
                _M2[y1] = ADD(MUL(_Gx[y1], _Gx[y1]), MUL(_Gy[y1], _Gy[y1]));
                if ( c == 0 ) { continue; }
                _m = CMPGT( _M2[y1], _M2[y] );
                _M2[y] = OR( AND(_m, _M2[y1]), ANDNOT(_m, _M2[y]) );
                _Gx[y] = OR( AND(_m, _Gx[y1]), ANDNOT(_m, _Gx[y]) );
                _Gy[y] = OR( AND(_m, _Gy[y1]), ANDNOT(_m, _Gy[y]) );
            }
        }
        // compute gradient mangitude (M) and normalize Gx
        for ( y = 0; y < h4 / 4; y++ ) {
            _m = MINsse( RCPSQRT(_M2[y]), SET(1e10f) );
            _M2[y] = RCP(_m);
            if (O) { _Gx[y] = MUL( MUL(_Gx[y], _m), SET(acMult) ); }
            if (O) { _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); }
        };
        memcpy( M + x * h, M2, h * sizeof(float) );
        // compute and store gradient orientation (O) via table lookup
        if ( O != 0 ) for ( y = 0; y < h; y++ ) { O[x * h + y] = acost[(int)Gx[y]]; }
        if ( O != 0 && full ) {
            y1 = ((~size_t(O + x * h) + 1) & 15) / 4;
            y = 0;
            for ( ; y < y1; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; }
            for ( ; y < h - 4; y += 4 ) STRu( O[y + x * h],
                                                  ADD( LDu(O[y + x * h]), AND(CMPLT(LDu(Gy[y]), SET(0.f)), SET(PI)) ) );
            for ( ; y < h; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; }
        }
    }
    alFree(Gx);
    alFree(Gy);
    alFree(M2);
}
コード例 #8
0
ファイル: rgbConvertMex.hpp プロジェクト: baiyancheng20/ACF
// Convert from rgb to luv using sse
template<class iT> void rgb2luv_sse( iT *I, float *J, int n, float nrm ) {
  const int k=256; float R[k], G[k], B[k];
  if( (size_t(R)&15||size_t(G)&15||size_t(B)&15||size_t(I)&15||size_t(J)&15)
    || n%4>0 ) { rgb2luv(I,J,n,nrm); return; }
  int i=0, i1, n1; float minu, minv, un, vn, mr[3], mg[3], mb[3];
  float *lTable = rgb2luv_setup(nrm,mr,mg,mb,minu,minv,un,vn);
  while( i<n ) {
    n1 = i+k; if(n1>n) n1=n; float *J1=J+i; float *R1, *G1, *B1;
    // convert to floats (and load input into cache)
    if( typeid(iT) != typeid(float) ) {
      R1=R; G1=G; B1=B; iT *Ri=I+i, *Gi=Ri+n, *Bi=Gi+n;
      for( i1=0; i1<(n1-i); i1++ ) {
        R1[i1] = (float) *Ri++; G1[i1] = (float) *Gi++; B1[i1] = (float) *Bi++;
      }
    } else { R1=((float*)I)+i; G1=R1+n; B1=G1+n; }
    // compute RGB -> XYZ
    for( int j=0; j<3; j++ ) {
      __m128 _mr, _mg, _mb, *_J=(__m128*) (J1+j*n);
      __m128 *_R=(__m128*) R1, *_G=(__m128*) G1, *_B=(__m128*) B1;
      _mr=SET(mr[j]); _mg=SET(mg[j]); _mb=SET(mb[j]);
      for( i1=i; i1<n1; i1+=4 ) *(_J++) = ADD( ADD(MUL(*(_R++),_mr),
        MUL(*(_G++),_mg)),MUL(*(_B++),_mb));
    }
    { // compute XZY -> LUV (without doing L lookup/normalization)
      __m128 _c15, _c3, _cEps, _c52, _c117, _c1024, _cun, _cvn;
      _c15=SET(15.0f); _c3=SET(3.0f); _cEps=SET(1e-35f);
      _c52=SET(52.0f); _c117=SET(117.0f), _c1024=SET(1024.0f);
      _cun=SET(13*un); _cvn=SET(13*vn);
      __m128 *_X, *_Y, *_Z, _x, _y, _z;
      _X=(__m128*) J1; _Y=(__m128*) (J1+n); _Z=(__m128*) (J1+2*n);
      for( i1=i; i1<n1; i1+=4 ) {
        _x = *_X; _y=*_Y; _z=*_Z;
        _z = RCP(ADD(_x,ADD(_cEps,ADD(MUL(_c15,_y),MUL(_c3,_z)))));
        *(_X++) = MUL(_c1024,_y);
        *(_Y++) = SUB(MUL(MUL(_c52,_x),_z),_cun);
        *(_Z++) = SUB(MUL(MUL(_c117,_y),_z),_cvn);
      }
    }
    { // perform lookup for L and finalize computation of U and V
      for( i1=i; i1<n1; i1++ ) J[i1] = lTable[(int)J[i1]];
      __m128 *_L, *_U, *_V, _l, _cminu, _cminv;
      _L=(__m128*) J1; _U=(__m128*) (J1+n); _V=(__m128*) (J1+2*n);
      _cminu=SET(minu); _cminv=SET(minv);
      for( i1=i; i1<n1; i1+=4 ) {
        _l = *(_L++);
        *_U = SUB(MUL(_l,*_U),_cminu); _U++;
        *_V = SUB(MUL(_l,*_V),_cminv); _V++;
      }
    }
    i = n1;
  }
}
コード例 #9
0
ファイル: hog.cpp プロジェクト: WuNL/pcl
void 
pcl::people::HOG::gradMag( float *I, int h, int w, int d, float *M, float *O ) const
{
#if defined(__SSE2__)
  int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m;
  float *acost = acosTable(), acMult=25000/2.02f;

  // allocate memory for storing one column of output (padded so h4%4==0)
  h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);

  M2=(float*) alMalloc(s,16);
  _M2=(__m128*) M2;
  Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx;
  Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy;

  // compute gradient magnitude and orientation for each column
  for( x=0; x<w; x++ ) {
  // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel
  for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
  for( y=0; y<d*h4/4; y++ ) _M2[y]=ADD(MUL(_Gx[y],_Gx[y]),MUL(_Gy[y],_Gy[y]));
  // store gradients with maximum response in the first channel
  for(c=1; c<d; c++) {
    for( y=0; y<h4/4; y++ ) {
    y1=h4/4*c+y; _m = CMPGT( _M2[y1], _M2[y] );
    _M2[y] = OR( AND(_m,_M2[y1]), ANDNOT(_m,_M2[y]) );
    _Gx[y] = OR( AND(_m,_Gx[y1]), ANDNOT(_m,_Gx[y]) );
    _Gy[y] = OR( AND(_m,_Gy[y1]), ANDNOT(_m,_Gy[y]) );
    }
  }
  // compute gradient magnitude (M) and normalize Gx
  for( y=0; y<h4/4; y++ ) {
    _m = MIN( RCPSQRT(_M2[y]), SET(1e10f) );
    _M2[y] = RCP(_m);
    _Gx[y] = MUL( MUL(_Gx[y],_m), SET(acMult) );
    _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) );
  };

  memcpy( M+x*h, M2, h*sizeof(float) );
  // compute and store gradient orientation (O) via table lookup
  if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
  }
  alFree(Gx); alFree(Gy); alFree(M2); 
#else
  int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; 
  float *acost = acosTable(), acMult=25000/2.02f;

  // allocate memory for storing one column of output (padded so h4%4==0)
  h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);

  M2=(float*) alMalloc(s,16);
  Gx=(float*) alMalloc(s,16); 
  Gy=(float*) alMalloc(s,16); 
  float m;

  // compute gradient magnitude and orientation for each column
  for( x=0; x<w; x++ ) {
  // compute gradients (Gx, Gy) and squared magnitude (M2) for each channel
  for( c=0; c<d; c++ ) grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
  for( y=0; y<d*h4; y++ ) 
  {
    M2[y] = Gx[y] * Gx[y] + Gy[y] * Gy[y];
  }  
  
  // store gradients with maximum response in the first channel
  for(c=1; c<d; c++) 
  {
  for( y=0; y<h4/4; y++ ) 
    {
      y1=h4/4*c+y; 

    for (int ii = 0; ii < 4; ++ii)
    {
      if (M2[y1 * 4 + ii] > M2[y * 4 + ii])
      {
        M2[y * 4 + ii] = M2[y1 * 4 + ii];
        Gx[y * 4 + ii] = Gx[y1 * 4 + ii];
        Gy[y * 4 + ii] = Gy[y1 * 4 + ii];
      }
    }
    }
  }
  // compute gradient magnitude (M) and normalize Gx
  for( y=0; y<h4; y++ ) 
  {
  m = 1.0f/sqrtf(M2[y]);
  m = m < 1e10f ? m : 1e10f;
    M2[y] = 1.0f / m;
    Gx[y] = ((Gx[y] * m) * acMult);
    if (Gy[y] < 0)
    Gx[y] = -Gx[y];
  }
  
  memcpy( M+x*h, M2, h*sizeof(float) );
  // compute and store gradient orientation (O) via table lookup
  if(O!=0) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
  }
  alFree(Gx); alFree(Gy); alFree(M2);
#endif  
}