void
LALSTPNAdaptiveWaveformEngine( LALStatus *status,
                							 REAL4Vector *signalvec1,REAL4Vector *signalvec2,
                							 REAL4Vector *a,REAL4Vector *ff,REAL8Vector *phi,REAL4Vector *shift,
                							 UINT4 *countback,
                							 InspiralTemplate *params,InspiralInit *paramsInit
                						 )
{
	/* PN parameters */
  LALSTPNparams mparams;

	/* needed for integration */
  LALAdaptiveRungeKutta4Integrator *integrator;
	unsigned int len;
	int intreturn;
	REAL8 yinit[11];
  REAL8Array *yout;

  /* other computed values */
  REAL8 unitHz, dt, m, lengths, norm;

  INITSTATUS(status);
  ATTATCHSTATUSPTR(status);

 	/* Make sure parameter and waveform structures exist. */
  ASSERT(params,     status, LALINSPIRALH_ENULL, LALINSPIRALH_MSGENULL);
  ASSERT(paramsInit, status, LALINSPIRALH_ENULL, LALINSPIRALH_MSGENULL);

  /* units */
  unitHz = params->totalMass * LAL_MTSUN_SI * (REAL8)LAL_PI;
  dt = 1.0/params->tSampling;	  /* tSampling is in Hz, so dt is in seconds */
  m = params->totalMass * LAL_MTSUN_SI;

  /* length estimation (Newtonian); since integration is adaptive, we could use a better estimate */
  lengths = (5.0/256.0) * pow(LAL_PI,-8.0/3.0) * pow(params->chirpMass * LAL_MTSUN_SI * params->fLower,-5.0/3.0) / params->fLower;

  /* setup coefficients for PN equations */
	XLALSTPNAdaptiveSetParams(&mparams,params,paramsInit);

  /* initialize the coordinates */
	yinit[0] = 0.0;                     							/* vphi */
	yinit[1] = params->fLower * unitHz; 							/* omega (really pi M f) */

	yinit[2] = sin(params->inclination);							/* LNh(x,y,z) */
	yinit[3] = 0.0;
	yinit[4] = cos(params->inclination);

	norm = pow(params->mass1/params->totalMass,2.0);
	yinit[5] = norm * params->spin1[0];								/* S1(x,y,z) */
	yinit[6] = norm * params->spin1[1];
	yinit[7] = norm * params->spin1[2];

	norm = pow(params->mass2/params->totalMass,2.0);	/* S2(x,y,z) */
	yinit[8] = norm * params->spin2[0];
	yinit[9] = norm * params->spin2[1];
	yinit[10]= norm * params->spin2[2];

  xlalErrno = 0;

	/* allocate the integrator */
	integrator = XLALAdaptiveRungeKutta4Init(11,XLALSTPNAdaptiveDerivatives,XLALSTPNAdaptiveTest,1.0e-6,1.0e-6);
  if (!integrator) {
		fprintf(stderr,"LALSTPNWaveform2: Cannot allocate integrator.\n");
    if (XLALClearErrno() == XLAL_ENOMEM)
      ABORT(status, LALINSPIRALH_EMEM, LALINSPIRALH_MSGEMEM);
    else
      ABORTXLAL(status);
  }

	/* stop the integration only when the test is true */
	integrator->stopontestonly = 1;

	/* run the integration; note: time is measured in units of total mass */
	len = XLALAdaptiveRungeKutta4(integrator,(void *)&mparams,yinit,0.0,lengths/m,dt/m,&yout);

	intreturn = integrator->returncode;
	XLALAdaptiveRungeKutta4Free(integrator);

	if (!len) {
    if (XLALClearErrno() == XLAL_ENOMEM) {
      ABORT(status, LALINSPIRALH_EMEM, LALINSPIRALH_MSGEMEM);
    } else {
			fprintf(stderr,"LALSTPNWaveform2: integration failed with errorcode %d.\n",intreturn);
			ABORTXLAL(status);
		}
	}

	/* report on abnormal termination (TO DO: throw some kind of LAL error?) */
	if (intreturn != 0 && intreturn != LALSTPN_TEST_ENERGY && intreturn != LALSTPN_TEST_OMEGADOT) {
		fprintf(stderr,"LALSTPNWaveform2 WARNING: integration terminated with code %d.\n",intreturn);
    fprintf(stderr,"                          Waveform parameters were m1 = %e, m2 = %e, s1 = (%e,%e,%e), s2 = (%e,%e,%e), inc = %e.",
     							 params->mass1, params->mass2,
     							 params->spin1[0], params->spin1[1], params->spin1[2],
     							 params->spin2[0], params->spin2[1], params->spin2[2],
     							 params->inclination);
	}

	/* check that we're not above Nyquist */
	if (yinit[1]/unitHz > 0.5 * params->tSampling) {
		fprintf(stderr,"LALSTPNWaveform2 WARNING: final frequency above Nyquist.\n");
	}

	/* if we have enough space, compute the waveform components; otherwise abort */
  if ((signalvec1 && len >= signalvec1->length) || (ff && len >= ff->length)) {
		if (signalvec1) {
			fprintf(stderr,"LALSTPNWaveform2: no space to write in signalvec1: %d vs. %d\n",len,signalvec1->length);
		} else if (ff) {
			fprintf(stderr,"LALSTPNWaveform2: no space to write in ff: %d vs. %d\n",len,ff->length);
		} else {
			fprintf(stderr,"LALSTPNWaveform2: no space to write anywhere!\n");
		}
		ABORT(status, LALINSPIRALH_ESIZE, LALINSPIRALH_MSGESIZE);
  } else {
		/* set up some aliases for the returned arrays; note vector 0 is time */

  //  REAL8 *thet = yout->data;
		REAL8 *vphi = &yout->data[1*len]; REAL8 *omega = &yout->data[2*len];
		REAL8 *LNhx = &yout->data[3*len]; REAL8 *LNhy  = &yout->data[4*len];	REAL8 *LNhz  = &yout->data[5*len];

		/* these are not needed for the waveforms:
		REAL8 *S1x  = &yout->data[6*len]; REAL8 *S1y   = &yout->data[7*len];  REAL8 *S1z   = &yout->data[8*len];
		REAL8 *S2x  = &yout->data[9*len]; REAL8 *S2y   = &yout->data[10*len]; REAL8 *S2z   = &yout->data[11*len];	*/

		*countback = len;

		if (signalvec1) { /* return polarizations */
			REAL8 v=0, amp=0, alpha=0, alpha0 = atan2(LNhy[0],LNhx[0]);

			for(unsigned int i=0;i<len;i++) {
				v = pow(omega[i],(1./3.));
				amp = params->signalAmplitude * (v*v);
				if(LNhx[i]*LNhx[i] + LNhy[i]*LNhy[i] > 0.0) {
          alpha = atan2(LNhy[i],LNhx[i]); alpha0 = alpha;
        } else {
          alpha = alpha0;
        }

				signalvec1->data[i]   = (REAL4)(-0.5 * amp * cos(2*vphi[i]) * cos(2*alpha) * (1.0 + LNhz[i]*LNhz[i]) \
				                                     + amp * sin(2*vphi[i]) * sin(2*alpha) * LNhz[i]);

				if (signalvec2) {
					signalvec2->data[i] = (REAL4)(-0.5 * amp * cos(2*vphi[i]) * sin(2*alpha) * (1.0 + LNhz[i]*LNhz[i]) \
																				     - amp * sin(2*vphi[i]) * cos(2*alpha) * LNhz[i]);
				}
			}

			params->fFinal = pow(v,3.0)/(LAL_PI*m);
			if (!signalvec2) params->tC = yout->data[len-1];	/* TO DO: why only in this case? */
		} else if (a) {	/* return coherentGW components */
			REAL8 apcommon, f2a, alpha, alpha0 = atan2(LNhy[0],LNhx[0]);

			/* (minus) amplitude for distance in m; should be (1e6 * LAL_PC_SI * params->distance) for distance in Mpc */
			apcommon = -4.0 * params->mu * LAL_MRSUN_SI/(params->distance);

			for(unsigned int i=0;i<len;i++) {
				f2a = pow(omega[i],(2./3.));
				if(LNhx[i]*LNhx[i] + LNhy[i]*LNhy[i] > 0.0) {
          alpha = atan2(LNhy[i],LNhx[i]); alpha0 = alpha;
        } else {
          alpha = alpha0;
        }

			  ff   ->data[i]     = (REAL4)(omega[i]/unitHz);
			  a    ->data[2*i]   = (REAL4)(apcommon * f2a * 0.5 * (1 + LNhz[i]*LNhz[i]));
			  a    ->data[2*i+1] = (REAL4)(apcommon * f2a * LNhz[i]);
			  phi  ->data[i]     = (REAL8)(2.0 * vphi[i]);
			  shift->data[i]     = (REAL4)(2.0 * alpha);
			}

			params->fFinal = ff->data[len-1];
		}
	}

	if (yout) XLALDestroyREAL8Array(yout);

  DETATCHSTATUSPTR(status);
  RETURN(status);
}
/*------------------------------------------------------------------------------------------
 *
 *    Definitions of functions (only one in this file, so no prototypes needed).
 *
 *------------------------------------------------------------------------------------------
 */
static int SEOBNRv3OptimizedInterpolatorGeneral(
                REAL8 * yin, /**<< Data to be interpolated; time first */
                REAL8 tinit, /**<< time at which to begin interpolating */
                REAL8 deltat, /**<< Spacing between interpolated times */
                UINT4 num_input_times, /**<< The number of input times */
                REAL8Array ** yout, /**<< Interpolation output */
                size_t dim /**<< Number of quantities interpolated (e.g. if yin = {t,x,y,z} then dim 3) */
                )
{
    int errnum = 0;

    /* needed for the final interpolation */
    gsl_spline *interp = NULL;
    gsl_interp_accel *accel = NULL;
    int outputlen = 0;
    REAL8Array *output = NULL;
    REAL8 *times, *vector;      /* aliases */

    /* note: for speed, this replaces the single CALLGSL wrapper applied before each GSL call */
    interp = gsl_spline_alloc(gsl_interp_cspline, num_input_times);
    accel = gsl_interp_accel_alloc();

    outputlen = (int)(yin[num_input_times-1] / deltat) + 1;

    output = XLALCreateREAL8ArrayL(2, dim+1, outputlen);/* Original (dim+1)*/

    if (!interp || !accel || !output) {
      errnum = XLAL_ENOMEM;   /* ouch again, ran out of memory */
      if (output)
        XLALDestroyREAL8Array(output);
      outputlen = 0;
      goto bail_out;
    }

    /* make an array of times */
    times = output->data;
    for (int j = 0; j < outputlen; j++)
      times[j] = tinit + deltat * j;

    /* interpolate! */
    for (unsigned int i = 1; i <= dim; i++) { /* Original (dim)  */
     //gsl_spline_init(interp, &yin->data[0], &yin->data[num_input_times * i], num_input_times + 1);
     gsl_spline_init(interp, yin, &(yin[num_input_times * i]), num_input_times);

      vector = output->data + outputlen * i;
      unsigned int index_old=0;
      double x_lo_old=0,y_lo_old=0,b_i_old=0,c_i_old=0,d_i_old=0;
      for (int j = 0; j < outputlen; j++) {
        optimized_gsl_spline_eval_e(interp,times[j],accel, &(vector[j]),&index_old,&x_lo_old,&y_lo_old,&b_i_old,&c_i_old,&d_i_old);
      }
    }

    /* deallocate stuff and return */
  bail_out:

    if (interp)
        XLAL_CALLGSL(gsl_spline_free(interp));
    if (accel)
        XLAL_CALLGSL(gsl_interp_accel_free(accel));

    if (errnum)
        XLAL_ERROR(errnum);

    *yout = output;
    return outputlen;
}
/**
 * This function is largely based on/copied from
 * XLALAdaptiveRungeKutta4(), which exists inside the
 * lal/src/utilities/LALAdaptiveRungeKutta4.c file
 * subroutine. It reads in an array of timeseries that
 * contain data *not* evenly spaced in time
 * and performs cubic spline interpolations to resample
 * the data to uniform time sampling. Interpolations use
 * GSL's built-in routines, which recompute interpolation
 * coefficients each time the itnerpolator is called.
 * This can be extremely inefficient; in case of SEOBNRv4,
 * first data points exist at very large dt, and
 * interp. coefficients might be needlessly recomputed
 * 10,000+ times or more. We also made the optimization
 * that assumes the data are sampled at points monotone
 * in time.
 * tinit and deltat specify the desired initial time and
 *   time spacing for the output interpolated data,
 * num_input_times denotes the number of points yin arrays
 *   are sampled in time.
 * yout is the output array.
 */
UNUSED static int
SEOBNRv2OptimizedInterpolatorNoAmpPhase (REAL8Array * yin, REAL8 tinit,
					 REAL8 deltat, UINT4 num_input_times,
					 REAL8Array ** yout)
{
  int errnum = 0;

  /* needed for the final interpolation */
  gsl_spline *interp = NULL;
  gsl_interp_accel *accel = NULL;
  int outputlen = 0;
  REAL8Array *output = NULL;
  REAL8 *times, *vector;	/* aliases */

  /* needed for the integration */
  size_t dim = 4;
  
  interp = gsl_spline_alloc (gsl_interp_cspline, num_input_times);
  accel = gsl_interp_accel_alloc ();

  outputlen = (int) (yin->data[num_input_times - 1] / deltat) + 1;
  output = XLALCreateREAL8ArrayL (2, dim + 1, outputlen);	/* Only dim + 1 rather than dim + 3 since we're not adding amp & phase */

  if (!interp || !accel || !output)
    {
      errnum = XLAL_ENOMEM;	/* ouch again, ran out of memory */
      if (output)
	XLALDestroyREAL8Array (output);
      outputlen = 0;
      goto bail_out;
    }

  /* make an array of times */
  times = output->data;
  for (int j = 0; j < outputlen; j++)
    times[j] = tinit + deltat * j;

  /* interpolate! */
  for (unsigned int i = 1; i <= dim; i++)
    {				/* only up to dim (4) because we are not interpolating amplitude and phase */
      //gsl_spline_init(interp, &yin->data[0], &yin->data[num_input_times * i], num_input_times + 1);
      gsl_spline_init (interp, &yin->data[0], &yin->data[num_input_times * i],
		       num_input_times);

      vector = output->data + outputlen * i;
      unsigned int index_old = 0;
      double x_lo_old = 0, y_lo_old = 0, b_i_old = 0, c_i_old = 0, d_i_old =
	0;
      for (int j = 0; j < outputlen; j++)
	{
	  optimized_gsl_spline_eval_e (interp, times[j], accel, &(vector[j]),
				       &index_old, &x_lo_old, &y_lo_old,
				       &b_i_old, &c_i_old, &d_i_old);
	}
    }

  /* deallocate stuff and return */
bail_out:

  if (interp)
    XLAL_CALLGSL (gsl_spline_free (interp));
  if (accel)
    XLAL_CALLGSL (gsl_interp_accel_free (accel));

  if (errnum)
    XLAL_ERROR (errnum);

  *yout = output;
  return outputlen;
}