Example #1
0
void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
#ifdef FFT_CUFFT
  plan->iterate++;
  my_times starttime,starttime2;
  my_times endtime,endtime2;

  int i,total,length,offset,num;
  double norm;
  FFT_DATA *data,*copy;
  // system specific constants


  // pre-remap to prepare for 1st FFTs if needed
  // copy = loc for remap result
  int nprocs=plan->nprocs;
if(nprocs>1)
{
  if(plan->init)
  my_gettime(CLOCK_REALTIME,&starttime);
  if (plan->pre_plan) {
    if (plan->pre_target == 0) copy = out;
    else copy = plan->copy;
    if(plan->init) remap_3d((double *) in, (double *) out, (double *) plan->scratch,plan->pre_plan);
    data = out;
  }
  else
    data = in;
}
  cufftResult retvalc;
  if(plan->init)
  {
        if(nprocs>1)
        {
      if(sizeof(FFT_CFLOAT)==sizeof(double))cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize/2,cudaMemcpyHostToDevice);
      if(sizeof(FFT_CFLOAT)==sizeof(float)) cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice);
      initfftdata((double*)plan->cudata2,(FFT_CFLOAT*)plan->cudata,plan->nfast,plan->nmid,plan->nslow);
    }
  }
    if (flag == -1)
    {
      retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_FORWARD);
    }
    else
    {
      retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_INVERSE);
    }
    if(retvalc!=CUFFT_SUCCESS) {printf("ErrorCUFFT: %i\n",retvalc);exit(EXIT_FAILURE);}

    FFTsyncthreads();
#endif
}
Example #2
0
File: fft_3d.c Project: jzrake/cow
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
  int i,total,length,num;
  double norm;
  FFT_DATA *data,*copy;

  /* pre-remap to prepare for 1st FFTs if needed
     copy = loc for remap result */

  if (plan->pre_plan) {
    if (plan->pre_target == 0)
      copy = out;
    else
      copy = plan->copy;
    remap_3d((double *) in, (double *) copy, (double *) plan->scratch,
             plan->pre_plan);
    data = copy;
  }
  else
    data = in;


  // ---------------------------------------------------------------------------
  // 1d FFTs along mid axis
  // ---------------------------------------------------------------------------
  total = plan->total1;
  length = plan->length1;
  {
    int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD;
    int N = length;
    fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length,
					   data, NULL,
					   1, length,
					   data, NULL,
					   1, length,
					   sign, FFTW_ESTIMATE);
    fftw_execute(fftplan);
    fftw_destroy_plan(fftplan);
  }
  /* 1st mid-remap to prepare for 2nd FFTs
     copy = loc for remap result */
  if (plan->mid1_target == 0)
    copy = out;
  else
    copy = plan->copy;
  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
           plan->mid1_plan);
  data = copy;


  // ---------------------------------------------------------------------------
  // 1d FFTs along mid axis
  // ---------------------------------------------------------------------------
  total = plan->total2;
  length = plan->length2;
  {
    int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD;
    int N = length;
    fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length,
					   data, NULL,
					   1, length,
					   data, NULL,
					   1, length,
					   sign, FFTW_ESTIMATE);
    fftw_execute(fftplan);
    fftw_destroy_plan(fftplan);
  }
  /* 2nd mid-remap to prepare for 3rd FFTs
     copy = loc for remap result */
  if (plan->mid2_target == 0)
    copy = out;
  else
    copy = plan->copy;
  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
           plan->mid2_plan);
  data = copy;


  // ---------------------------------------------------------------------------
  // 1d FFTs along slow axis
  // ---------------------------------------------------------------------------
  total = plan->total3;
  length = plan->length3;
  {
    int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD;
    int N = length;
    fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length,
					   data, NULL,
					   1, length,
					   data, NULL,
					   1, length,
					   sign, FFTW_ESTIMATE);
    fftw_execute(fftplan);
    fftw_destroy_plan(fftplan);
  }

  /* post-remap to put data in output format if needed
     destination is always out */
  if (plan->post_plan)
    remap_3d((double *) data, (double *) out, (double *) plan->scratch,
             plan->post_plan);

  /* scaling if required */
  if (flag == -1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    for (i = 0; i < num; i++) {
      out[i][0] *= norm;
      out[i][1] *= norm;
    }
  }
}
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)

{
  int i,offset,num;
  double norm;
  FFT_DATA *data,*copy;

/* pre-remap to prepare for 1st FFTs if needed
   copy = loc for remap result */

  if (plan->pre_plan) {
    if (plan->pre_target == 0)
      copy = out;
    else
      copy = plan->copy;
    remap_3d((double *) in, (double *) copy, (double *) plan->scratch,
	     plan->pre_plan);
    data = copy;
  }
  else
    data = in;

/* 1d FFTs along fast axis */

  if (flag == -1)
    fftw_execute_dft(plan->plan_fast_forward,data,data);
  else
    fftw_execute_dft(plan->plan_fast_backward,data,data);

/* 1st mid-remap to prepare for 2nd FFTs
   copy = loc for remap result */

  if (plan->mid1_target == 0)
    copy = out;
  else
    copy = plan->copy;
  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
	   plan->mid1_plan);
  data = copy;

/* 1d FFTs along mid axis */

  if (flag == -1)
    fftw_execute_dft(plan->plan_mid_forward,data,data);
  else
    fftw_execute_dft(plan->plan_mid_backward,data,data);

/* 2nd mid-remap to prepare for 3rd FFTs
   copy = loc for remap result */

  if (plan->mid2_target == 0)
    copy = out;
  else
    copy = plan->copy;
  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
	   plan->mid2_plan);
  data = copy;

/* 1d FFTs along slow axis */

  if (flag == -1)
    fftw_execute_dft(plan->plan_slow_forward,data,data);
  else
    fftw_execute_dft(plan->plan_slow_backward,data,data);

/* post-remap to put data in output format if needed
   destination is always out */

  if (plan->post_plan)
    remap_3d((double *) data, (double *) out, (double *) plan->scratch,
	     plan->post_plan);

/* scaling if required */

  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    for (i = 0; i < num; i++) {
      out[i][0] *= norm;
      out[i][1] *= norm;
    }
  }

}
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
  int i,total,length,offset,num;
  FFT_SCALAR norm, *out_ptr;
  FFT_DATA *data,*copy;

  // system specific constants

#if defined(FFT_SCSL)
  int isys = 0;
  FFT_PREC scalef = 1.0;
#elif defined(FFT_DEC)
  char c = 'C';
  char f = 'F';
  char b = 'B';
  int one = 1;
#elif defined(FFT_T3E)
  int isys = 0;
  double scalef = 1.0;
#elif defined(FFT_ACML)
  int info;
#elif defined(FFT_FFTW3)
  FFTW_API(plan) theplan;
#else
  // nothing to do for other FFTs.
#endif

  // pre-remap to prepare for 1st FFTs if needed
  // copy = loc for remap result

  if (plan->pre_plan) {
    if (plan->pre_target == 0) copy = out;
    else copy = plan->copy;
    remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch,
             plan->pre_plan);
    data = copy;
  }
  else
    data = in;

  // 1d FFTs along fast axis

  total = plan->total1;
  length = plan->length1;

#if defined(FFT_SGI)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,&data[offset],1,plan->coeff1);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff1,
           plan->work1,&isys);
#elif defined(FFT_ACML)
  num=total/length;
  FFT_1D(&flag,&num,&length,data,plan->coeff1,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&data[offset],&length,&flag,plan->coeff1);
#elif defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_fast,data);
  else
    DftiComputeBackward(plan->handle_fast,data);
#elif defined(FFT_DEC)
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
  else
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#elif defined(FFT_T3E)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1,
           plan->work1,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_fast_forward;
  else
    theplan=plan->plan_fast_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]);
#endif

  // 1st mid-remap to prepare for 2nd FFTs
  // copy = loc for remap result

  if (plan->mid1_target == 0) copy = out;
  else copy = plan->copy;
  remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch,
           plan->mid1_plan);
  data = copy;

  // 1d FFTs along mid axis

  total = plan->total2;
  length = plan->length2;

#if defined(FFT_SGI)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,&data[offset],1,plan->coeff2);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff2,
           plan->work2,&isys);
#elif defined(FFT_ACML)
  num=total/length;
  FFT_1D(&flag,&num,&length,data,plan->coeff2,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&data[offset],&length,&flag,plan->coeff2);
#elif defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_mid,data);
  else
    DftiComputeBackward(plan->handle_mid,data);
#elif defined(FFT_DEC)
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
  else
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#elif defined(FFT_T3E)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2,
           plan->work2,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_mid_forward;
  else
    theplan=plan->plan_mid_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]);
#endif

  // 2nd mid-remap to prepare for 3rd FFTs
  // copy = loc for remap result

  if (plan->mid2_target == 0) copy = out;
  else copy = plan->copy;
  remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch,
           plan->mid2_plan);
  data = copy;

  // 1d FFTs along slow axis

  total = plan->total3;
  length = plan->length3;

#if defined(FFT_SGI)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,&data[offset],1,plan->coeff3);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff3,
           plan->work3,&isys);
#elif defined(FFT_ACML)
  num=total/length;
  FFT_1D(&flag,&num,&length,data,plan->coeff3,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&data[offset],&length,&flag,plan->coeff3);
#elif defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_slow,data);
  else
    DftiComputeBackward(plan->handle_slow,data);
#elif defined(FFT_DEC)
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
  else
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#elif defined(FFT_T3E)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3,
           plan->work3,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_slow_forward;
  else
    theplan=plan->plan_slow_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]);
#endif

  // post-remap to put data in output format if needed
  // destination is always out

  if (plan->post_plan)
    remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out, (FFT_SCALAR *) plan->scratch,
             plan->post_plan);

  // scaling if required
#if !defined(FFT_T3E) && !defined(FFT_ACML)
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    out_ptr = (FFT_SCALAR *)out;
    for (i = 0; i < num; i++) {
#if defined(FFT_FFTW3)
      *(out_ptr++) *= norm;
      *(out_ptr++) *= norm;
#elif defined(FFT_MKL)
      out[i] *= norm;
#else
      out[i].re *= norm;
      out[i].im *= norm;
#endif
    }
  }
#endif

#ifdef FFT_T3E
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    for (i = 0; i < num; i++) out[i] *= (norm,norm);
  }
#endif

#ifdef FFT_ACML
  norm = plan->norm;
  num = plan->normnum;
  for (i = 0; i < num; i++) {
    out[i].re *= norm;
    out[i].im *= norm;
  }
#endif

}
Example #5
0
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)

{
    int i,total,length,offset,num;
    double norm;
    FFT_DATA *data,*copy;

    /* system specific constants */

#ifdef FFT_DEC
    char c = 'C';
    char f = 'F';
    char b = 'B';
    int one = 1;
#endif
#ifdef FFT_T3E
    int isys = 0;
    double scalef = 1.0;
#endif

    /* pre-remap to prepare for 1st FFTs if needed
       copy = loc for remap result */

    if (plan->pre_plan) {
        if (plan->pre_target == 0)
            copy = out;
        else
            copy = plan->copy;
        remap_3d((double *) in, (double *) copy, (double *) plan->scratch,
                 plan->pre_plan);
        data = copy;
    }
    else
        data = in;

    /* 1d FFTs along fast axis */

    total = plan->total1;
    length = plan->length1;

#ifdef FFT_SGI
    for (offset = 0; offset < total; offset += length)
        FFT_1D(flag,length,&data[offset],1,plan->coeff1);
#endif
#ifdef FFT_INTEL
    for (offset = 0; offset < total; offset += length)
        FFT_1D(&data[offset],&length,&flag,plan->coeff1);
#endif
#ifdef FFT_DEC
    if (flag == -1)
        for (offset = 0; offset < total; offset += length)
            FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
    else
        for (offset = 0; offset < total; offset += length)
            FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#endif
#ifdef FFT_T3E
    for (offset = 0; offset < total; offset += length)
        FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1,
               plan->work1,&isys);
#endif
#ifdef FFT_FFTW
    if (flag == -1)
        fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
    else
        fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
#endif

    /* 1st mid-remap to prepare for 2nd FFTs
       copy = loc for remap result */

    if (plan->mid1_target == 0)
        copy = out;
    else
        copy = plan->copy;
    remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
             plan->mid1_plan);
    data = copy;

    /* 1d FFTs along mid axis */

    total = plan->total2;
    length = plan->length2;

#ifdef FFT_SGI
    for (offset = 0; offset < total; offset += length)
        FFT_1D(flag,length,&data[offset],1,plan->coeff2);
#endif
#ifdef FFT_INTEL
    for (offset = 0; offset < total; offset += length)
        FFT_1D(&data[offset],&length,&flag,plan->coeff2);
#endif
#ifdef FFT_DEC
    if (flag == -1)
        for (offset = 0; offset < total; offset += length)
            FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
    else
        for (offset = 0; offset < total; offset += length)
            FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#endif
#ifdef FFT_T3E
    for (offset = 0; offset < total; offset += length)
        FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2,
               plan->work2,&isys);
#endif
#ifdef FFT_FFTW
    if (flag == -1)
        fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
    else
        fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
#endif

    /* 2nd mid-remap to prepare for 3rd FFTs
       copy = loc for remap result */

    if (plan->mid2_target == 0)
        copy = out;
    else
        copy = plan->copy;
    remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
             plan->mid2_plan);
    data = copy;

    /* 1d FFTs along slow axis */

    total = plan->total3;
    length = plan->length3;

#ifdef FFT_SGI
    for (offset = 0; offset < total; offset += length)
        FFT_1D(flag,length,&data[offset],1,plan->coeff3);
#endif
#ifdef FFT_INTEL
    for (offset = 0; offset < total; offset += length)
        FFT_1D(&data[offset],&length,&flag,plan->coeff3);
#endif
#ifdef FFT_DEC
    if (flag == -1)
        for (offset = 0; offset < total; offset += length)
            FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
    else
        for (offset = 0; offset < total; offset += length)
            FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#endif
#ifdef FFT_T3E
    for (offset = 0; offset < total; offset += length)
        FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3,
               plan->work3,&isys);
#endif
#ifdef FFT_FFTW
    if (flag == -1)
        fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
    else
        fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
#endif

    /* post-remap to put data in output format if needed
       destination is always out */

    if (plan->post_plan)
        remap_3d((double *) data, (double *) out, (double *) plan->scratch,
                 plan->post_plan);

    /* scaling if required */

#ifndef FFT_T3E
    if (flag == 1 && plan->scaled) {
        norm = plan->norm;
        num = plan->normnum;
        for (i = 0; i < num; i++) {
            out[i].re *= norm;
            out[i].im *= norm;
        }
    }
#endif

#ifdef FFT_T3E
    if (flag == 1 && plan->scaled) {
        norm = plan->norm;
        num = plan->normnum;
        for (i = 0; i < num; i++)
            out[i] *= (norm,norm);
    }
#endif

}
Example #6
0
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
  int i,total,length,offset,num;
  FFT_SCALAR norm;
#if defined(FFT_FFTW3)
  FFT_SCALAR *out_ptr;
#endif
  FFT_DATA *data,*copy;

  // system specific constants

#if defined(FFT_FFTW3)
  FFTW_API(plan) theplan;
#else
  // nothing to do for other FFTs
#endif

  // pre-remap to prepare for 1st FFTs if needed
  // copy = loc for remap result

  if (plan->pre_plan) {
    if (plan->pre_target == 0) copy = out;
    else copy = plan->copy;
    remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy,
             (FFT_SCALAR *) plan->scratch, plan->pre_plan);
    data = copy;
  }
  else
    data = in;

  // 1d FFTs along fast axis

  total = plan->total1;
  length = plan->length1;

#if defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_fast,data);
  else
    DftiComputeBackward(plan->handle_fast,data);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_fast_forward;
  else
    theplan=plan->plan_fast_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]);
#endif

  // 1st mid-remap to prepare for 2nd FFTs
  // copy = loc for remap result

  if (plan->mid1_target == 0) copy = out;
  else copy = plan->copy;
  remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy,
           (FFT_SCALAR *) plan->scratch, plan->mid1_plan);
  data = copy;

  // 1d FFTs along mid axis

  total = plan->total2;
  length = plan->length2;

#if defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_mid,data);
  else
    DftiComputeBackward(plan->handle_mid,data);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_mid_forward;
  else
    theplan=plan->plan_mid_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]);
#endif

  // 2nd mid-remap to prepare for 3rd FFTs
  // copy = loc for remap result

  if (plan->mid2_target == 0) copy = out;
  else copy = plan->copy;
  remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy,
           (FFT_SCALAR *) plan->scratch, plan->mid2_plan);
  data = copy;

  // 1d FFTs along slow axis

  total = plan->total3;
  length = plan->length3;

#if defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_slow,data);
  else
    DftiComputeBackward(plan->handle_slow,data);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_slow_forward;
  else
    theplan=plan->plan_slow_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]);
#endif

  // post-remap to put data in output format if needed
  // destination is always out

  if (plan->post_plan)
    remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out,
             (FFT_SCALAR *) plan->scratch, plan->post_plan);

  // scaling if required
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
#if defined(FFT_FFTW3)
    out_ptr = (FFT_SCALAR *)out;
#endif
    for (i = 0; i < num; i++) {
#if defined(FFT_FFTW3)
      *(out_ptr++) *= norm;
      *(out_ptr++) *= norm;
#elif defined(FFT_MKL)
      out[i] *= norm;
#else
      out[i].re *= norm;
      out[i].im *= norm;
#endif
    }
  }
}