Exemple #1
0
void NonLinearLeastSquares::Fit(const ColumnVector& Data,
   ColumnVector& Parameters)
{
   Tracer tr("NonLinearLeastSquares::Fit");
   n_param = Parameters.Nrows(); n_obs = Data.Nrows();
   DataPointer = &Data;
   FindMaximum2::Fit(Parameters, Lim);
   cout << "\nConverged" << endl;
}
std::vector<float> BinghamThread::fit_bingham( const ColumnVector& sh_data,
                                                  const Matrix& tess,
                                                  const std::vector<QSet<int> >& adj,
                                                  const Matrix& base,
                                                  const int neighborhood,
                                                  const int num_max )
{
    unsigned int mod = 9;
    // reserve memory:
    std::vector<float> result( 27, 0 );


    // if no CSD no fit necessary.
    if ( sh_data( 1 ) == 0 )
    {
        return result;
    }

    // get maxima:
    ColumnVector radius = base * sh_data;

    std::vector<float> qfRadius( radius.Nrows() );
    for ( unsigned int i = 0; i < qfRadius.size(); ++i )
    {
        qfRadius[i] = radius( i + 1 );
    }

    std::vector<int> qiRadius( radius.Nrows() );
    for ( unsigned int i = 0; i < qiRadius.size(); ++i )
    {
        qiRadius[i] = i;
    }

    std::vector<int> maxima;
    for ( unsigned int i = 0; i < qfRadius.size(); ++i )
    {
        QSet<int> n = adj[i];
        float r = qfRadius[i];
        if ( r > 0 )
        {
            bool isMax = true;
            foreach (const int &value, n)
            {
                if ( r < qfRadius[value] )
                {
                    isMax = false;
                }
            }
            if ( isMax )
            {
                maxima.push_back( i );
            }
        }
    }
void SpectClust::multByMatrix(ColumnVector &N, ColumnVector &O, const Matrix &M) {
  int nRow = M.Nrows(), nCol = M.Ncols();
  if(!(N.Nrows() == O.Nrows() && M.Nrows() == M.Ncols() && M.Nrows() == N.Nrows())) {
    Err::errAbort("wrong dimensions: " + ToStr(O.Nrows()) + " " + ToStr(N.Nrows()) + " " + ToStr(M.Nrows()));
  }
  for(int rowIx = 0; rowIx < nRow; rowIx++) {
    N[rowIx] = 0;
    for(int colIx = 0; colIx < nCol; colIx++) {
      N[rowIx] += O[colIx] * M[rowIx][colIx];
    }
  }
}
Exemple #4
0
void FFT(const ColumnVector& U, const ColumnVector& V,
   ColumnVector& X, ColumnVector& Y)
{
   // from Carl de Boor (1980), Siam J Sci Stat Comput, 1 173-8
   // but first try Sande and Gentleman
   Tracer trace("FFT");
   REPORT
   const int n = U.Nrows();                     // length of arrays
   if (n != V.Nrows() || n == 0)
      Throw(ProgramException("Vector lengths unequal or zero", U, V));
   if (n == 1) { REPORT X = U; Y = V; return; }

   // see if we can use the newfft routine
   if (!FFT_Controller::OnlyOldFFT && FFT_Controller::CanFactor(n))
   {
      REPORT
      X = U; Y = V;
      if ( FFT_Controller::ar_1d_ft(n,X.Store(),Y.Store()) ) return;
   }

   ColumnVector B = V;
   ColumnVector A = U;
   X.ReSize(n); Y.ReSize(n);
   const int nextmx = 8;
#ifndef ATandT
   int prime[8] = { 2,3,5,7,11,13,17,19 };
#else
   int prime[8];
   prime[0]=2; prime[1]=3; prime[2]=5; prime[3]=7;
   prime[4]=11; prime[5]=13; prime[6]=17; prime[7]=19;
#endif
   int after = 1; int before = n; int next = 0; bool inzee = true;
   int now = 0; int b1;             // initialised to keep gnu happy

   do
   {
      for (;;)
      {
     if (next < nextmx) { REPORT now = prime[next]; }
     b1 = before / now;  if (b1 * now == before) { REPORT break; }
     next++; now += 2;
      }
      before = b1;

      if (inzee) { REPORT fftstep(A, B, X, Y, after, now, before); }
      else { REPORT fftstep(X, Y, A, B, after, now, before); }

      inzee = !inzee; after *= now;
   }
Exemple #5
0
void RealFFT(const ColumnVector& U, ColumnVector& X, ColumnVector& Y)
{
   // Fourier transform of a real series
   Tracer trace("RealFFT");
   REPORT
   const int n = U.Nrows();                     // length of arrays
   const int n2 = n / 2;
   if (n != 2 * n2)
      Throw(ProgramException("Vector length not multiple of 2", U));
   ColumnVector A(n2), B(n2);
   Real* a = A.Store(); Real* b = B.Store(); Real* u = U.Store(); int i = n2;
   while (i--) { *a++ = *u++; *b++ = *u++; }
   FFT(A,B,A,B);
   int n21 = n2 + 1;
   X.ReSize(n21); Y.ReSize(n21);
   i = n2 - 1;
   a = A.Store(); b = B.Store();              // first els of A and B
   Real* an = a + i; Real* bn = b + i;        // last els of A and B
   Real* x = X.Store(); Real* y = Y.Store();  // first els of X and Y
   Real* xn = x + n2; Real* yn = y + n2;      // last els of X and Y

   *x++ = *a + *b; *y++ = 0.0;                // first complex element
   *xn-- = *a++ - *b++; *yn-- = 0.0;          // last complex element

   int j = -1; i = n2/2;
   while (i--)
   {
      Real c,s; cossin(j--,n,c,s);
      Real am = *a - *an; Real ap = *a++ + *an--;
      Real bm = *b - *bn; Real bp = *b++ + *bn--;
      Real samcbp = s * am + c * bp; Real sbpcam = s * bp - c * am;
      *x++  =  0.5 * ( ap + samcbp); *y++  =  0.5 * ( bm + sbpcam);
      *xn-- =  0.5 * ( ap - samcbp); *yn-- =  0.5 * (-bm + sbpcam);
   }
}
void basisfield::SetCoef(const ColumnVector& pcoef) 
{
  if (pcoef.Nrows() != int(CoefSz())) {throw BasisfieldException("basisfield::SetCoef::Mismatch between input vector and # of coefficients");}
  if (!coef) {coef = boost::shared_ptr<NEWMAT::ColumnVector>(new NEWMAT::ColumnVector(pcoef));}
  else {*coef = pcoef;}
  futd.assign(4,false);
}
Exemple #7
0
void FFTI(const ColumnVector& U, const ColumnVector& V,
   ColumnVector& X, ColumnVector& Y)
{
   // Inverse transform
   Tracer trace("FFTI");
   REPORT
   FFT(U,-V,X,Y);
   const Real n = X.Nrows(); X /= n; Y /= (-n);
}
/** Utility printing function. */
void printColumnVector(ColumnVector &v, std::ostream *out, const std::string& delim) {
  int nRow = v.Nrows();
  int i = 0;
  if(out == NULL) 
    out = &cout;
  for(i = 0; i < nRow - 1; i++)
    (*out) << v.element(i) << delim;
  (*out) << v.element(i);
}
Exemple #9
0
double unsupervised::gaussian(int k, const ColumnVector& ob){
	ColumnVector tmp(Dim);
	tmp = ob-mu[k];
	double x = (tmp.t() * sigma[k].i() * tmp).AsScalar();
	if( x<0.0 || std::isnan(x) ) x = 0.0; //for avoiding the failure from calculation error of newmat library.
    
	tmp.CleanUp();
	return (exp(x*(-0.5)) / (pow(2.0*M_PI,ob.Nrows()*0.5)*pow(sigma[k].Determinant(),0.5)));
}
Exemple #10
0
double unsupervised::gaussian(const ColumnVector& ob, const ColumnVector& mix_mu, const Matrix& mix_sigma){
	ColumnVector tmp(Dim);
	tmp = ob-mix_mu;
	double x = (tmp.t() * mix_sigma.i() * tmp).AsScalar();
	if(x<0.0) x = 0.0; //for avoiding the failure from calculation error of newmat library.
    
	tmp.CleanUp();
	return (exp(x*(-0.5)) / (pow(2.0*M_PI,ob.Nrows()*0.5)*pow(mix_sigma.Determinant(),0.5)));
}
void basisfield::Set(const ColumnVector& pfield) 
{
  if (pfield.Nrows() != int(FieldSz())) {throw BasisfieldException("basisfield::Set::Mismatch between input vector and size of field");}

  volume<float>  vol_pfield(FieldSz_x(),FieldSz_y(),FieldSz_z());
  vol_pfield.setdims(Vxs_x(),Vxs_y(),Vxs_z());
  vol_pfield.insert_vec(pfield);
  
  Set(vol_pfield);
}
Exemple #12
0
//Delete a BV.  Very messy
void SOGP::delete_bv(int loc){
  //First swap loc to the last spot
  RowVector alphastar = alpha.Row(loc);
  alpha.Row(loc)=alpha.Row(alpha.Nrows());
  //Now C
  double cstar = C(loc,loc);
  ColumnVector Cstar = C.Column(loc);
  Cstar(loc)=Cstar(Cstar.Nrows());
  Cstar=Cstar.Rows(1,Cstar.Nrows()-1);
  ColumnVector Crep=C.Column(C.Ncols());
  Crep(loc)=Crep(Crep.Nrows());
  C.Row(loc)=Crep.t();;
  C.Column(loc)=Crep;
  //and Q
  double qstar = Q(loc,loc);
  ColumnVector Qstar = Q.Column(loc);
  Qstar(loc)=Qstar(Qstar.Nrows());
  Qstar=Qstar.Rows(1,Qstar.Nrows()-1);
  ColumnVector Qrep=Q.Column(Q.Ncols());
  Qrep(loc)=Qrep(Qrep.Nrows());
  Q.Row(loc)=Qrep.t();
  Q.Column(loc)=Qrep;

  //Ok, now do the actual removal  Appendix G section g
  alpha= alpha.Rows(1,alpha.Nrows()-1);
  ColumnVector qc = (Qstar+Cstar)/(qstar+cstar);
  for(int i=1;i<=alpha.Ncols();i++)
    alpha.Column(i)-=alphastar(i)*qc;
  C = C.SymSubMatrix(1,C.Ncols()-1) + (Qstar*Qstar.t())/qstar - ((Qstar+Cstar)*(Qstar+Cstar).t())/(qstar+cstar);
  Q = Q.SymSubMatrix(1,Q.Ncols()-1) - (Qstar*Qstar.t())/qstar;
  
  //And the BV
  BV.Column(loc)=BV.Column(BV.Ncols());
  BV=BV.Columns(1,BV.Ncols()-1);
  
  current_size--;
}
Exemple #13
0
ReturnMatrix trans(const ColumnVector & a)
//!  @brief Translation.
{
   Matrix translation(4,4);

   translation << fourbyfourident; // identity matrix 

   if (a.Nrows() == 3) 
     {
       translation(1,4) = a(1);
       translation(2,4) = a(2);
       translation(3,4) = a(3);
     }
   else
       cerr << "trans: wrong size in input vector." << endl;

   translation.Release(); return translation;
}
static void SlowFT(const ColumnVector& a, const ColumnVector&b,
   ColumnVector& x, ColumnVector& y)
{
   int n = a.Nrows();
   x.ReSize(n); y.ReSize(n);
   Real f = 6.2831853071795864769/n;
   for (int j=1; j<=n; j++)
   {
      Real sumx = 0.0; Real sumy = 0.0;
      for (int k=1; k<=n; k++)
      {
	 Real theta = - (j-1) * (k-1) * f;
	 Real c = cos(theta); Real s = sin(theta);
	 sumx += c * a(k) - s * b(k); sumy += s * a(k) + c * b(k);
      }
      x(j) = sumx; y(j) = sumy;
   }
}
static void SlowDTT_II(const ColumnVector& a, ColumnVector& c, ColumnVector& s)
{
   int n = a.Nrows(); c.ReSize(n); s.ReSize(n);
   Real f = 6.2831853071795864769 / (4*n);
   int k;

   for (k=1; k<=n; k++)
   {
      Real sum = 0.0;
      const int k1 = k-1;              // otherwise Visual C++ 5 fails
      for (int j=1; j<=n; j++) sum += cos(k1 * (2*j-1) * f) * a(j);
      c(k) = sum;
   }

   for (k=1; k<=n; k++)
   {
      Real sum = 0.0;
      for (int j=1; j<=n; j++) sum += sin(k * (2*j-1) * f) * a(j);
      s(k) = sum;
   }
}
static void SlowDTT(const ColumnVector& a, ColumnVector& c, ColumnVector& s)
{
   int n1 = a.Nrows(); int n = n1 - 1;
   c.ReSize(n1); s.ReSize(n1);
   Real f = 6.2831853071795864769 / (2*n);
   int k;

   int sign = 1;
   for (k=1; k<=n1; k++)
   {
      Real sum = 0.0;
      for (int j=2; j<=n; j++) sum += cos((j-1) * (k-1) * f) * a(j);
      c(k) = sum + (a(1) + sign * a(n1)) / 2.0;
      sign = -sign;
   }

   for (k=2; k<=n; k++)
   {
      Real sum = 0.0;
      for (int j=2; j<=n; j++) sum += sin((j-1) * (k-1) * f) * a(j);
      s(k) = sum;
   }
   s(1) = s(n1) = 0;
}
Exemple #17
0
void FindMaximum2::Fit(ColumnVector& Theta, int n_it)
{
   Tracer tr("FindMaximum2::Fit");
   enum State {Start, Restart, Continue, Interpolate, Extrapolate,
      Fail, Convergence};
   State TheState = Start;
   Real z,w,x,x2,g,l1,l2,l3,d1,d2=0,d3;
   ColumnVector Theta1, Theta2, Theta3;
   int np = Theta.Nrows();
   ColumnVector H1(np), H3, HP(np), K, K1(np);
   bool oorg, conv;
   int counter = 0;
   Theta1 = Theta; HP = 0.0; g = 0.0;

   // This is really a set of gotos and labels, but they do not work
   // correctly in AT&T C++ and Sun 4.01 C++.

   for(;;)
   {
      switch (TheState)
      {
      case Start:
	 tr.ReName("FindMaximum2::Fit/Start");
	 Value(Theta1, true, l1, oorg);
	 if (oorg) Throw(ProgramException("invalid starting value\n"));

      case Restart:
	 tr.ReName("FindMaximum2::Fit/ReStart");
	 conv = NextPoint(H1, d1);
	 if (conv) { TheState = Convergence; break; }
	 if (counter++ > n_it) { TheState = Fail; break; }

	 z = 1.0 / sqrt(d1);
	 H3 = H1 * z; K = (H3 - HP) * g; HP = H3;
	 g = 0.0;                     // de-activate to use curved projection
	 if (g==0.0) K1 = 0.0; else K1 = K * 0.2 + K1 * 0.6;
	 // (K - K1) * alpha + K1 * (1 - alpha)
	 //     = K * alpha + K1 * (1 - 2 * alpha)
	 K = K1 * d1; g = z;

      case Continue:
	 tr.ReName("FindMaximum2::Fit/Continue");
	 Theta2 = Theta1 + H1 + K;
	 Value(Theta2, false, l2, oorg);
	 if (counter++ > n_it) { TheState = Fail; break; }
	 if (oorg)
	 {
	    H1 *= 0.5; K *= 0.25; d1 *= 0.5; g *= 2.0;
	    TheState =  Continue; break;
	 }
	 d2 = LastDerivative(H1 + K * 2.0);

      case Interpolate:
	 tr.ReName("FindMaximum2::Fit/Interpolate");
	 z = d1 + d2 - 3.0 * (l2 - l1);
	 w = z * z - d1 * d2;
	 if (w < 0.0) { TheState = Extrapolate; break; }
	 w = z + sqrt(w);
	 if (1.5 * w + d1 < 0.0)
	    { TheState = Extrapolate; break; }
	 if (d2 > 0.0 && l2 > l1 && w > 0.0)
	    { TheState = Extrapolate; break; }
	 x = d1 / (w + d1); x2 = x * x; g /= x;
	 Theta3 = Theta1 + H1 * x + K * x2;
	 Value(Theta3, true, l3, oorg);
	 if (counter++ > n_it) { TheState = Fail; break; }
	 if (oorg)
	 {
	    if (x <= 1.0)
	       { x *= 0.5; x2 = x*x; g *= 2.0; d1 *= x; H1 *= x; K *= x2; }
	    else
	    {
	       x = 0.5 * (x-1.0); x2 = x*x; Theta1 = Theta2;
	       H1 = (H1 + K * 2.0) * x;
	       K *= x2; g = 0.0; d1 = x * d2; l1 = l2;
	    }
	    TheState = Continue; break;
	 }

	 if (l3 >= l1 && l3 >= l2)
	    { Theta1 = Theta3; l1 = l3; TheState =  Restart; break; }

	 d3 = LastDerivative(H1 + K * 2.0);
	 if (l1 > l2)
	    { H1 *= x; K *= x2; Theta2 = Theta3; d1 *= x; d2 = d3*x; }
	 else
	 {
	    Theta1 = Theta2; Theta2 = Theta3;
	    x -= 1.0; x2 = x*x; g = 0.0; H1 = (H1 + K * 2.0) * x;
	    K *= x2; l1 = l2; l2 = l3; d1 = x*d2; d2 = x*d3;
	    if (d1 <= 0.0) { TheState = Start; break; }
	 }
	 TheState =  Interpolate; break;

      case Extrapolate:
	 tr.ReName("FindMaximum2::Fit/Extrapolate");
	 Theta1 = Theta2; g = 0.0; K *= 4.0; H1 = (H1 * 2.0 + K);
	 d1 = 2.0 * d2; l1 = l2;
	 TheState = Continue; break;

      case Fail:
	 Throw(ConvergenceException(Theta));

      case Convergence:
	 Theta = Theta1; return;
      }
   }
}
Exemple #18
0
void Robot::dqp_torque(const ColumnVector & q, const ColumnVector & qp,
                     const ColumnVector & dqp,
                     ColumnVector & ltorque, ColumnVector & dtorque)
{
   int i;
   ColumnVector z0(3);
   Matrix Rt, temp;
   Matrix Q(3,3);
   ColumnVector *w, *wp, *vp, *a, *f, *n, *F, *N, *p;
   ColumnVector *dw, *dwp, *dvp, *da, *df, *dn, *dF, *dN, *dp;
   if(q.Ncols() != 1 || q.Nrows() != dof) error("q has wrong dimension");
   if(qp.Ncols() != 1 || qp.Nrows() != dof) error("qp has wrong dimension");
   ltorque = ColumnVector(dof);
   dtorque = ColumnVector(dof);
   set_q(q);
   w = new ColumnVector[dof+1];
   wp = new ColumnVector[dof+1];
   vp = new ColumnVector[dof+1];
   a = new ColumnVector[dof+1];
   f = new ColumnVector[dof+1];
   n = new ColumnVector[dof+1];
   F = new ColumnVector[dof+1];
   N = new ColumnVector[dof+1];
   p = new ColumnVector[dof+1];
   dw = new ColumnVector[dof+1];
   dwp = new ColumnVector[dof+1];
   dvp = new ColumnVector[dof+1];
   da = new ColumnVector[dof+1];
   df = new ColumnVector[dof+1];
   dn = new ColumnVector[dof+1];
   dF = new ColumnVector[dof+1];
   dN = new ColumnVector[dof+1];
   dp = new ColumnVector[dof+1];
   w[0] = ColumnVector(3);
   wp[0] = ColumnVector(3);
   vp[0] = gravity;
   dw[0] = ColumnVector(3);
   dwp[0] = ColumnVector(3);
   dvp[0] = ColumnVector(3);
   z0 = 0.0;
   Q = 0.0;
   Q(1,2) = -1.0;
   Q(2,1) = 1.0;
   z0(3) = 1.0;
   w[0] = 0.0;
   wp[0] = 0.0;
   dw[0] = 0.0;
   dwp[0] = 0.0;
   dvp[0] = 0.0;
   for(i = 1; i <= dof; i++) {
      Rt = links[i].R.t();
      p[i] = ColumnVector(3);
      p[i](1) = links[i].get_a();
      p[i](2) = links[i].get_d() * Rt(2,3);
      p[i](3) = links[i].get_d() * Rt(3,3);
      if(links[i].get_joint_type() != 0) {
         dp[i] = ColumnVector(3);
         dp[i](1) = 0.0;
         dp[i](2) = Rt(2,3);
         dp[i](3) = Rt(3,3);
      }
      if(links[i].get_joint_type() == 0) {
         w[i] = Rt*(w[i-1] + z0*qp(i));
         dw[i] = Rt*(dw[i-1] + z0*dqp(i));
         wp[i] = Rt*(wp[i-1] + vec_x_prod(w[i-1],z0*qp(i)));
         dwp[i] = Rt*(dwp[i-1]
               + vec_x_prod(dw[i-1],z0*qp(i))
               + vec_x_prod(w[i-1],z0*dqp(i))
               );
         vp[i] = vec_x_prod(wp[i],p[i])
               + vec_x_prod(w[i],vec_x_prod(w[i],p[i]))
               + Rt*(vp[i-1]);
         dvp[i] = vec_x_prod(dwp[i],p[i])
               + vec_x_prod(dw[i],vec_x_prod(w[i],p[i]))
               + vec_x_prod(w[i],vec_x_prod(dw[i],p[i]))
               + Rt*dvp[i-1];
      } else {
         w[i] = Rt*w[i-1];
         dw[i] = Rt*dw[i-1];
         wp[i] = Rt*wp[i-1];
         dwp[i] = Rt*dwp[i-1];
         vp[i] = Rt*(vp[i-1]
               + vec_x_prod(w[i],z0*qp(i))) * 2.0
               + vec_x_prod(wp[i],p[i])
               + vec_x_prod(w[i],vec_x_prod(w[i],p[i]));
         dvp[i] = Rt*(dvp[i-1]
               + (vec_x_prod(dw[i],z0*qp(i)) * 2.0
               + vec_x_prod(w[i],z0*dqp(i))))
               + vec_x_prod(dwp[i],p[i])
               + vec_x_prod(dw[i],vec_x_prod(w[i],p[i]))
               + vec_x_prod(w[i],vec_x_prod(dw[i],p[i]));
      }
      a[i] = vec_x_prod(wp[i],links[i].r)
            + vec_x_prod(w[i],vec_x_prod(w[i],links[i].r))
            + vp[i];
      da[i] = vec_x_prod(dwp[i],links[i].r)
            + vec_x_prod(dw[i],vec_x_prod(w[i],links[i].r))
            + vec_x_prod(w[i],vec_x_prod(dw[i],links[i].r))
            + dvp[i];
   }

   for(i = dof; i >= 1; i--) {
      F[i] = a[i] * links[i].m;
      N[i] = links[i].I*wp[i] + vec_x_prod(w[i],links[i].I*w[i]);
      dF[i] = da[i] * links[i].m;
      dN[i] = links[i].I*dwp[i] + vec_x_prod(dw[i],links[i].I*w[i])
            + vec_x_prod(w[i],links[i].I*dw[i]);
      if(i == dof) {
         f[i] = F[i];
         n[i] = vec_x_prod(p[i],f[i])
               + vec_x_prod(links[i].r,F[i]) + N[i];
         df[i] = dF[i];
         dn[i] = vec_x_prod(p[i],df[i])
               + vec_x_prod(links[i].r,dF[i]) + dN[i];
      } else {
         f[i] = links[i+1].R*f[i+1] + F[i];
         n[i] = links[i+1].R*n[i+1] + vec_x_prod(p[i],f[i])
               + vec_x_prod(links[i].r,F[i]) + N[i];
         df[i] = links[i+1].R*df[i+1] + dF[i];
         dn[i] = links[i+1].R*dn[i+1] + vec_x_prod(p[i],df[i])
               + vec_x_prod(links[i].r,dF[i]) + dN[i];
      }
      if(links[i].get_joint_type() == 0) {
         temp = ((z0.t()*links[i].R)*n[i]);
         ltorque(i) = temp(1,1);
         temp = ((z0.t()*links[i].R)*dn[i]);
         dtorque(i) = temp(1,1);
      } else {
         temp = ((z0.t()*links[i].R)*f[i]);
         ltorque(i) = temp(1,1);
         temp = ((z0.t()*links[i].R)*df[i]);
         dtorque(i) = temp(1,1);
      }
   }

   delete []dp;
   delete []dN;
   delete []dF;
   delete []dn;
   delete []df;
   delete []da;
   delete []dvp;
   delete []dwp;
   delete []dw;
   delete []p;
   delete []N;
   delete []F;
   delete []n;
   delete []f;
   delete []a;
   delete []vp;
   delete []wp;
   delete []w;
}
Exemple #19
0
static void fftstep(ColumnVector& A, ColumnVector& B, ColumnVector& X,
                    ColumnVector& Y, int after, int now, int before)
{
    REPORT
    Tracer trace("FFT(step)");
    // const Real twopi = 6.2831853071795864769;
    const int gamma = after * before;
    const int delta = now * after;
    // const Real angle = twopi / delta;  Real temp;
    // Real r_omega = cos(angle);  Real i_omega = -sin(angle);
    Real r_arg = 1.0;
    Real i_arg = 0.0;
    Real* x = X.Store();
    Real* y = Y.Store();   // pointers to array storage
    const int m = A.Nrows() - gamma;

    for (int j = 0; j < now; j++)
    {
        Real* a = A.Store();
        Real* b = B.Store(); // pointers to array storage
        Real* x1 = x;
        Real* y1 = y;
        x += after;
        y += after;
        for (int ia = 0; ia < after; ia++)
        {
            // generate sins & cosines explicitly rather than iteratively
            // for more accuracy; but slower
            cossin(-(j*after+ia), delta, r_arg, i_arg);

            Real* a1 = a++;
            Real* b1 = b++;
            Real* x2 = x1++;
            Real* y2 = y1++;
            if (now==2)
            {
                REPORT int ib = before;
                if (ib) for (;;)
                    {
                        REPORT
                        Real* a2 = m + a1;
                        Real* b2 = m + b1;
                        a1 += after;
                        b1 += after;
                        Real r_value = *a2;
                        Real i_value = *b2;
                        *x2 = r_value * r_arg - i_value * i_arg + *(a2-gamma);
                        *y2 = r_value * i_arg + i_value * r_arg + *(b2-gamma);
                        if (!(--ib)) break;
                        x2 += delta;
                        y2 += delta;
                    }
            }
            else
            {
                REPORT int ib = before;
                if (ib) for (;;)
                    {
                        REPORT
                        Real* a2 = m + a1;
                        Real* b2 = m + b1;
                        a1 += after;
                        b1 += after;
                        Real r_value = *a2;
                        Real i_value = *b2;
                        int in = now-1;
                        while (in--)
                        {
                            // it should be possible to make this faster
                            // hand code for now = 2,3,4,5,8
                            // use symmetry to halve number of operations
                            a2 -= gamma;
                            b2 -= gamma;
                            Real temp = r_value;
                            r_value = r_value * r_arg - i_value * i_arg + *a2;
                            i_value = temp    * i_arg + i_value * r_arg + *b2;
                        }
                        *x2 = r_value;
                        *y2 = i_value;
                        if (!(--ib)) break;
                        x2 += delta;
                        y2 += delta;
                    }
            }

            // temp = r_arg;
            // r_arg = r_arg * r_omega - i_arg * i_omega;
            // i_arg = temp  * i_omega + i_arg * r_omega;

        }
    }
}