inline
 __host__ __device__ dffast4
 kernel_ffast4_feature<V>::new_state(const point2d<int>& n)
 {
   dffast4 b;
   for(int i = 0; i < 4; i++)
     b[i] = s1_(n.row() + 2 * c4[i][0],
   		   n.col() + 2 * c4[i][1]);
   for(int i = 0; i < 4; i++)
     b[i+4] = s2_(n.row() / 2 + 2 * c4[i][0],
   		     n.col() / 2 + 2 * c4[i][1]);
   return b;
 }
  inline
  __host__ __device__ float
  kernel_ffast4_feature<V>::distance_linear(const dffast4& a,
  						const point2d<int>& n)
  {
    unsigned short d = 0;

    for(int i = 0; i < 4; i++)
    {
      gl8u v = s1_(n.row() + 2* c4[i][0],
  		   n.col() + 2* c4[i][1]);
      d += ::abs(v - a[i]);
    }

    for(int i = 0; i < 4; i++)
    {
      gl8u v = s2_(n.row() / 2 + 2* c4[i][0],
  		   n.col() / 2 + 2* c4[i][1]);
      d += ::abs(v - a[4+i]);
    }
    return d / (255.f * 16.f);

    // for(int i = 0; i < 16; i += 2)
    // {
    //   float v = s1_(n.row() + circle_r3[i][0],
    // 		   n.col() + circle_r3[i][1]).x * 255.f;
    //   d += fabs(v - a[i/2]);
    // }
    // for(int i = 0; i < 16; i += 2)
    // {
    //   float v = s2_(n.row() + 2 * circle_r3[i][0],
    // 		    n.col() + 2 * circle_r3[i][1]).x * 255.f;
    //   d += fabs(v - a[8+i/2]);
    // }
    // return d / (255.f * 16.f);

    // return cuimg::distance_mean_linear(a, new_state(n));
  }
void EstimateEMOS2<T,N>::iterate(
){
    int l, m;
    RectDomain<N> rect(this->est_.lbound(), this->est_.ubound());
    this->old_ = this->est_;
    int length = strata_.length(0);
    int size = length - 2;
    for ( l = 0; l < length; l++ ) 
    {
        estF_ = 0;
        prev_ = this->est_;
        // get image prediction which is a convolution of est with 
        // interpolation of psfs (multiplication and in Fourier domain)
        for ( m = 0; m < size; m++ ) 
        {
            s1_ = 0;
            if ( m == 0 )
            {
                s1_(strata_(m)) = this->est_(strata_(m));
            }
            else if ( m == size - 1 )
            {
                s1_(strata_(m+2)) = this->est_(strata_(m+2));
            }
            s1_(strata_(m+1)) = this->est_(strata_(m+1));

            s2_ = s1_;
            multiplyStratum(strata_(m+1), s1_, a_, true);
            multiplyStratum(strata_(m+1), s2_, a_, false);

            mirror(s1_, s_);
            fftw_.plan(s_, sF_);
            fftw_.execute();
            estF_ += sF_ * psfsF_(m);

            mirror(s2_, s_);
            fftw_.plan(s_, sF_);
            fftw_.execute();
            estF_ += sF_ * psfsF_(m+1);
        }
        // convert back to space domain
        fftw_.plan(estF_, s2_);
        fftw_.execute();
        s2_ /= s2_.size();
        this->est_ = s2_(rect);
        // get the ratio of image and prediction
        s_ = where( this->est_ > epsilon_, this->img_/this->est_, this->img_/epsilon_);
        mirror(s_, s2_);
        fftw_.plan(s2_, estF_);
        fftw_.execute();
	
        // convolve the ratio with psf and multiply with old estimate
        this->est_ = 0;
        for ( m = 0; m < size; m++ ) 
        {
            sF_ = estF_ * conj(psfsF_(m));
            // multiply with old estimate
            fftw_.plan(sF_, s2_);
            fftw_.execute();
            s2_ /= s2_.size();
            s_ = s2_(rect);
            s_ *= prev_;
            this->est_ += s_;
        }
        this->est_ *= scale_;
        this->est_ = where( this->est_ > epsilon_, this->est_, 0);
    }
}