Exemplo n.º 1
0
// run nIter iterations of Horn & Schunk optical flow (alters Vx, Vy)
void opticalFlowHsMex( float *Vx, float *Vy, const float *Ex, const float *Ey,
  const float *Et, const float *Z, const int h, const int w, const int nIter )
{
  int x, y, x1, i, t, s; float my, mx, m, *Vx0, *Vy0;
  s=w*h*sizeof(float); Vx0=new float[s]; Vy0=new float[s];
  for( t=0; t<nIter; t++ ) {
    memcpy(Vx0,Vx,s); memcpy(Vy0,Vy,s);
    for( x=1; x<w-1; x++ ) {
      // do as much work as possible in SSE (assume non-aligned memory)
      for( y=1; y<h-4; y+=4 ) {
        x1=x*h; i=x1+y; __m128 _mx, _my, _m;
        _my=MUL(ADD(LDu(Vy0[x1-h+y]),LDu(Vy0[x1+h+y]),
          LDu(Vy0[x1+y-1]),LDu(Vy0[x1+y+1])),.25f);
        _mx=MUL(ADD(LDu(Vx0[x1-h+y]),LDu(Vx0[x1+h+y]),
          LDu(Vx0[x1+y-1]),LDu(Vx0[x1+y+1])),.25f);
        _m=MUL(ADD(MUL(LDu(Ey[i]),_my),MUL(LDu(Ex[i]),_mx),
          LDu(Et[i])),LDu(Z[i]));
        STRu(Vx[i],SUB(_mx,MUL(LDu(Ex[i]),_m)));
        STRu(Vy[i],SUB(_my,MUL(LDu(Ey[i]),_m)));
      }
      // do remainder of work in regular loop
      for( ; y<h-1; y++ ) {
        x1=x*h; i=x1+y;
        mx=.25f*(Vx0[x1-h+y]+Vx0[x1+h+y]+Vx0[x1+y-1]+Vx0[x1+y+1]);
        my=.25f*(Vy0[x1-h+y]+Vy0[x1+h+y]+Vy0[x1+y-1]+Vy0[x1+y+1]);
        m = (Ex[i]*mx + Ey[i]*my + Et[i])*Z[i];
        Vx[i]=mx-Ex[i]*m; Vy[i]=my-Ey[i]*m;
      }
    }
  }
  delete [] Vx0; delete [] Vy0;
}
Exemplo n.º 2
0
// compute gradient magnitude and orientation at each location (uses sse)
void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) {
    int x, y, y1, c, h4, s;
    float *Gx, *Gy, *M2;
    __m128 *_Gx, *_Gy, *_M2, _m;
    float *acost = acosTable(), acMult = 10000.0f;
    // allocate memory for storing one column of output (padded so h4%4==0)
    h4 = (h % 4 == 0) ? h : h - (h % 4) + 4;
    s = d * h4 * sizeof(float);
    M2 = (float*) alMalloc(s, 16);
    _M2 = (__m128*) M2;
    Gx = (float*) alMalloc(s, 16);
    _Gx = (__m128*) Gx;
    Gy = (float*) alMalloc(s, 16);
    _Gy = (__m128*) Gy;
    // compute gradient magnitude and orientation for each column
    for ( x = 0; x < w; x++ ) {
        // compute gradients (Gx, Gy) with maximum squared magnitude (M2)
        for (c = 0; c < d; c++) {
            grad1( I + x * h + c * w * h, Gx + c * h4, Gy + c * h4, h, w, x );
            for ( y = 0; y < h4 / 4; y++ ) {
                y1 = h4 / 4 * c + y;
                _M2[y1] = ADD(MUL(_Gx[y1], _Gx[y1]), MUL(_Gy[y1], _Gy[y1]));
                if ( c == 0 ) { continue; }
                _m = CMPGT( _M2[y1], _M2[y] );
                _M2[y] = OR( AND(_m, _M2[y1]), ANDNOT(_m, _M2[y]) );
                _Gx[y] = OR( AND(_m, _Gx[y1]), ANDNOT(_m, _Gx[y]) );
                _Gy[y] = OR( AND(_m, _Gy[y1]), ANDNOT(_m, _Gy[y]) );
            }
        }
        // compute gradient mangitude (M) and normalize Gx
        for ( y = 0; y < h4 / 4; y++ ) {
            _m = MINsse( RCPSQRT(_M2[y]), SET(1e10f) );
            _M2[y] = RCP(_m);
            if (O) { _Gx[y] = MUL( MUL(_Gx[y], _m), SET(acMult) ); }
            if (O) { _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) ); }
        };
        memcpy( M + x * h, M2, h * sizeof(float) );
        // compute and store gradient orientation (O) via table lookup
        if ( O != 0 ) for ( y = 0; y < h; y++ ) { O[x * h + y] = acost[(int)Gx[y]]; }
        if ( O != 0 && full ) {
            y1 = ((~size_t(O + x * h) + 1) & 15) / 4;
            y = 0;
            for ( ; y < y1; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; }
            for ( ; y < h - 4; y += 4 ) STRu( O[y + x * h],
                                                  ADD( LDu(O[y + x * h]), AND(CMPLT(LDu(Gy[y]), SET(0.f)), SET(PI)) ) );
            for ( ; y < h; y++ ) { O[y + x * h] += (Gy[y] < 0) * PI; }
        }
    }
    alFree(Gx);
    alFree(Gy);
    alFree(M2);
}