Пример #1
0
void fftwnd(fftwnd_plan p, int howmany,
	    fftw_complex *in, int istride, int idist,
	    fftw_complex *out, int ostride, int odist)
{
     fftw_complex *work;

#ifdef FFTW_DEBUG
     if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
	 && p->nwork && p->work)
	  fftw_die("bug with FFTW_THREADSAFE flag\n");
#endif

     if (p->nwork && !p->work)
	  work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex));
     else
	  work = p->work;

     switch (p->rank) {
	 case 0:
	      break;
	 case 1:
	      if (p->is_in_place)	/* fft is in-place */
		   fftw(p->plans[0], howmany, in, istride, idist,
			work, 1, 0);
	      else
		   fftw(p->plans[0], howmany, in, istride, idist,
			out, ostride, odist);
	      break;
	 default:		/* rank >= 2 */
	      {
		   if (p->is_in_place) {
			out = in;
			ostride = istride;
			odist = idist;
		   }
		   if (howmany > 1 && odist < ostride)
			fftwnd_aux_howmany(p, 0, howmany,
					   in, istride, idist,
					   out, ostride, odist,
					   work);
		   else {
			int i;

			for (i = 0; i < howmany; ++i)
			     fftwnd_aux(p, 0,
					in + i * idist, istride,
					out + i * odist, ostride,
					work);
		   }
	      }
     }

     if (p->nwork && !p->work)
	  fftw_free(work);

}
Пример #2
0
/*
 * guaranteed out-of-place transform.  Does the necessary
 * copying if the plan is in-place.
 */
static void fftw_out_of_place(fftw_plan plan, int n,
			      fftw_complex *in, fftw_complex *out)
{
     if (plan->flags & FFTW_IN_PLACE) {
	  array_copy(out, in, n);
	  fftw(plan, 1, out, 1, n, (fftw_complex *)0, 1, n);
     } else {
	  fftw(plan, 1, in, 1, n, out, 1, n);
     }
}
Пример #3
0
int F77_FUNC_ (fft_z_stick_single, FFT_Z_STICK_SINGLE)
  (fftw_plan *p, FFTW_COMPLEX *a, int *ldz)
{
  fftw(*p, 1,a, 1, 0, 0, 0, 0);

  return 0;
}
Пример #4
0
/*
 * alternate version of fftwnd_aux -- this version pushes the howmany
 * loop down to the leaves of the computation, for greater locality in
 * cases where dist < stride
 */
void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
			int howmany,
			fftw_complex *in, int istride, int idist,
			fftw_complex *out, int ostride, int odist,
			fftw_complex *work)
{
     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
     int k;

     if (cur_dim == p->rank - 2) {
	  /* just do the last dimension directly: */
	  if (p->is_in_place)
	       for (k = 0; k < n; ++k)
		    fftw(p->plans[p->rank - 1], howmany,
			 in + k * n_after * istride, istride, idist,
			 work, 1, 0);
	  else
	       for (k = 0; k < n; ++k)
		    fftw(p->plans[p->rank - 1], howmany,
			 in + k * n_after * istride, istride, idist,
			 out + k * n_after * ostride, ostride, odist);
     } else {			/* we have at least two dimensions to go */
	  int i;

	  /* 
	   * process the subsequent dimensions recursively, in
	   * hyperslabs, to get maximum locality:
	   */
	  for (i = 0; i < n; ++i)
	       fftwnd_aux_howmany(p, cur_dim + 1, howmany,
			      in + i * n_after * istride, istride, idist,
				  out + i * n_after * ostride, ostride, odist,
				  work);
     }

     /* do the current dimension (in-place): */
     if (p->nbuffers == 0)
	  for (k = 0; k < n_after; ++k)
	       fftw(p->plans[cur_dim], howmany,
		    out + k * ostride, n_after * ostride, odist,
		    work, 1, 0);
     else			/* using contiguous copy buffers: */
	  for (k = 0; k < n_after; ++k)
	       fftw_buffered(p->plans[cur_dim], howmany,
			     out + k * ostride, n_after * ostride, odist,
			     work, p->nbuffers, work + n);
}
Пример #5
0
void dft(double *jr, double *ji, int n, int iflag)
{
  fftw_plan plan;
  int i;
  double ninv;
  FFTW_COMPLEX *cbuf;
  static int wisdom_inited=0;
  char *ram_cache_wisdom;
  int plan_flags;

  if(!wisdom_inited)  {
    wisdom_inited=1;
    wisdom_file=getenv("GRACE_FFTW_WISDOM_FILE");
    ram_cache_wisdom=getenv("GRACE_FFTW_RAM_WISDOM");

    if(ram_cache_wisdom) sscanf(ram_cache_wisdom, "%d", &using_wisdom);
    /* turn on wisdom if it is requested even without persistent storage */

    if(wisdom_file && wisdom_file[0] ) {
      /* if a file was specified in GRACE_FFTW_WISDOM_FILE, try to read it */
      FILE *wf;
      fftw_status fstat;
      wf=fopen(wisdom_file,"r");
      if(wf) {
	fstat=fftw_import_wisdom_from_file(wf);
	fclose(wf);
	initial_wisdom=fftw_export_wisdom_to_string();
      } else initial_wisdom=0;
      atexit(save_wisdom);
      using_wisdom=1; /* if a file is specified, always use wisdom */
    }
  }

  plan_flags=using_wisdom? (FFTW_USE_WISDOM | FFTW_MEASURE) : FFTW_ESTIMATE;

  plan=fftw_create_plan(n, iflag?FFTW_BACKWARD:FFTW_FORWARD,
		   plan_flags | FFTW_IN_PLACE);
  cbuf=xcalloc(n, sizeof(*cbuf));
  if(!cbuf) return;
  for(i=0; i<n; i++) {
    cbuf[i].re=jr[i]; cbuf[i].im=ji[i];
  }
  fftw(plan, 1, cbuf, 1, 1, 0, 1, 1);
  fftw_destroy_plan(plan);

  if(!iflag) {
    ninv=1.0/n;
    for(i=0; i<n; i++) {
    jr[i]=cbuf[i].re*ninv; ji[i]=cbuf[i].im*ninv;
    }
  } else {
    for(i=0; i<n; i++) {
      jr[i]=cbuf[i].re; ji[i]=cbuf[i].im;
    }
  }

  XCFREE(cbuf);
  
}
Пример #6
0
int F77_FUNC_ (fft_z_stick, FFT_Z_STICK)
   (fftw_plan *p, FFTW_COMPLEX *zstick, int *ldz, int *nstick_l)
{
   int howmany, idist;
   howmany = (*nstick_l) ;
   idist   = (*ldz);
   fftw(*p, howmany, zstick, 1, idist, 0, 0, 0);
   return 0;
}
Пример #7
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags 
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags 
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     if (paranoid && !(flags & FFTW_IN_PLACE)) {
	  begin = fftw_get_time();
	  test_ergun(n, dir, plan);
	  end = fftw_get_time();
	  t = fftw_time_to_sec(fftw_time_diff(end, begin));
	  WHEN_VERBOSE(2, printf("time for validation: %f s\n", t));
     }
     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
Пример #8
0
void rfftwnd_c2real_aux_howmany(fftwnd_plan p, int cur_dim,
				int howmany,
				fftw_complex *in, int istride, int idist,
				fftw_real *out, int ostride, int odist,
				fftw_complex *work)
{
     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
     int k;

     /* do the current dimension (in-place): */
     for (k = 0; k < n_after; ++k)
	  fftw(p->plans[cur_dim], howmany,
	       in + k * istride, n_after * istride, idist,
	       work, 1, 0);

     if (cur_dim == p->rank - 2) {
	  /* just do the last dimension directly: */
	  if (p->is_in_place)
	       for (k = 0; k < n; ++k)
		    rfftw_c2real_overlap_aux(p->plans[p->rank - 1], howmany,
					     in + (k * n_after * istride),
					     istride, idist,
				       out + (k * n_after * ostride) * 2,
					     ostride, odist,
					     (fftw_real *) work);
	  else {
	       int nlast = p->plans[p->rank - 1]->n;
	       for (k = 0; k < n; ++k)
		    rfftw_c2real_aux(p->plans[p->rank - 1], howmany,
				     in + k * n_after * istride,
				     istride, idist,
				     out + k * nlast * ostride,
				     ostride, odist,
				     (fftw_real *) work);
	  }
     } else {			/* we have at least two dimensions to go */
	  int nr = p->plans[p->rank - 1]->n;
	  int n_after_r = p->is_in_place ? n_after * 2
	       : nr * (n_after / (nr/2 + 1));
	  int i;

	  /* 
	   * process the subsequent dimensions recursively, in hyperslabs,
	   * to get maximum locality: 
	   */
	  for (i = 0; i < n; ++i)
	       rfftwnd_c2real_aux_howmany(p, cur_dim + 1, howmany,
			      in + i * n_after * istride, istride, idist,
			   out + i * n_after_r * ostride, ostride, odist,
					  work);
     }
}
Пример #9
0
static void first_dim_aux(rfftwnd_mpi_plan p,
			  int n_fields, fftw_real *local_data)
{
     int local_ny = p->p_transpose->local_ny;
     int nx = p->p_fft_x->n;
     fftw_complex *work_1d = p->work ? p->work : p->p_fft->work;
     
     n_fields *= p->p_fft->n_after[0]; /* dimensions after y 
					  no longer need be considered
					  separately from n_fields */
     if (n_fields > 1) {
	  fftw_plan p_fft_x = p->p_fft_x;
	  int fft_iter;
	  for (fft_iter = 0; fft_iter < local_ny; ++fft_iter)
	       fftw(p_fft_x, n_fields,
		    ((fftw_complex *) local_data)
		    + (nx * n_fields) * fft_iter, n_fields, 1,
		    work_1d, 1, 0);
     }
     else
	  fftw(p->p_fft_x, local_ny,
	       (fftw_complex *) local_data, 1, nx, work_1d, 1, 0);
}
Пример #10
0
static void *fftw_howmany_thread(fftw_loop_data *ldata)
{
     int min = ldata->min, max = ldata->max;
     fftw_howmany_data *d = (fftw_howmany_data*) ldata->data;
     fftw_plan p = d->p;
     int howmany = d->howmany;
     fftw_complex *io_data = d->io_data;
     int iostride = d->iostride, iodist = d->iodist, iodist0 = d->iodist0;
     fftw_complex *work = d->work + d->wdist * ldata->thread_num;

     for (; min < max; ++min)
	  fftw(p, howmany, io_data + min*iodist0, iostride, iodist, work,1,0);

     return 0;
}
Пример #11
0
/*
 * The timer keeps doubling the number of iterations
 * until the program runs for more than FFTW_TIME_MIN
 */
double fftw_measure_runtime(fftw_plan plan)
{
     FFTW_COMPLEX *in, *out;
     fftw_time begin, end;
     double t;
     int i, iter;
     int n;

     n = plan->n;

     iter = 1;

retry:
     in = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX));
     out = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX));

     begin = fftw_get_time();
     for (i = 0; i < iter; ++i) {
	  int j;

	  /* generate random inputs */
	  for (j = 0; j < n; ++j) {
	       c_re(in[j]) = 1.0;
	       c_im(in[j]) = 32.432;
	  }

	  fftw(plan, 1, in, 1, 0, out, 1, 0);
     }
     end = fftw_get_time();

     t = fftw_time_to_sec(fftw_time_diff(end,begin));

     fftw_free(in);
     fftw_free(out);

     if (t < FFTW_TIME_MIN) {
	  iter *= 2;
	  /* 
	   * See D. E. Knuth, Structured Programming with GOTO Statements,
	   * Computing Surveys (6), December 1974, for a justification
	   * of this `goto' in the `n + 1/2' loop.
	   */
	  goto retry;
     }

     return t / (double)iter;
}
Пример #12
0
int F77_FUNC_ (fft_x_stick_single, FFT_X_STICK_SINGLE)
(fftw_plan *p, FFTW_COMPLEX *a, int *nx, int *ny, int *nz, int *ldx, int *ldy )
{

   int i, j, ind;
   int xstride, bigstride;
   int xhowmany, xidist;
   double * ptr;

/* trasform  along x and y */
   bigstride = (*ldx) * (*ldy);

   xhowmany = (*ny);
   xstride  = 1;
   xidist   = (*ldx);

   fftw(*p,xhowmany,a,xstride,xidist,0,0,0);

   return 0;
}
Пример #13
0
void rfftwnd_real2c_aux(fftwnd_plan p, int cur_dim,
			fftw_real *in, int istride,
			fftw_complex *out, int ostride,
			fftw_real *work)
{
     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];

     if (cur_dim == p->rank - 2) {
	  /* just do the last dimension directly: */
	  if (p->is_in_place)
	       rfftw_real2c_aux(p->plans[p->rank - 1], n,
				in, istride, (n_after * istride) * 2,
				out, istride, n_after * istride,
				work);
	  else
	       rfftw_real2c_aux(p->plans[p->rank - 1], n,
			 in, istride, p->plans[p->rank - 1]->n * istride,
				out, ostride, n_after * ostride,
				work);
     } else {			/* we have at least two dimensions to go */
	  int nr = p->plans[p->rank - 1]->n;
	  int n_after_r = p->is_in_place ? n_after * 2 
	       : nr * (n_after / (nr/2 + 1));
	  int i;

	  /* 
	   * process the subsequent dimensions recursively, in hyperslabs,
	   * to get maximum locality: 
	   */
	  for (i = 0; i < n; ++i)
	       rfftwnd_real2c_aux(p, cur_dim + 1,
				  in + i * n_after_r * istride, istride,
			     out + i * n_after * ostride, ostride, work);
     }

     /* do the current dimension (in-place): */
     fftw(p->plans[cur_dim], n_after,
	  out, n_after * ostride, ostride,
	  (fftw_complex *) work, 1, 0);
     /* I hate this cast */
}
Пример #14
0
void process_seg(float* data) {
    int i;
    float* p = data;
    static float dbuff[FFT_LEN*2];
    static fftw_plan  planfwd,planinverse;

    if (!planfwd) {
      planfwd=fftw_create_plan(FFT_LEN, FFTW_BACKWARD, 
		FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM );
      planinverse=fftw_create_plan(IFFT_LEN, FFTW_FORWARD,
		FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM );
    }

    fftw_one(planfwd, (fftw_complex *)data, (fftw_complex *)NULL);
    data[0]=0;
    data[1]=0;
    fftw(planinverse, NSTRIPS, (fftw_complex *)data, 1, IFFT_LEN, 
  			(fftw_complex *)NULL, 1, IFFT_LEN);
    for (i=0; i<NSTRIPS; i++) {
        output_samples(p, i, obuf_pos);
        p += IFFT_LEN*2;
    }
    obuf_pos+=IFFT_LEN*2/CHAR_BIT;
}
Пример #15
0
int F77_FUNC_ (fft_x_stick, FFT_X_STICK)
(fftw_plan *p, FFTW_COMPLEX *a, int *nx, int *ny, int *nz, int *ldx, int *ldy )
{

   int i, j, ind;
   int xstride, bigstride;
   int xhowmany, xidist;
   double * ptr;

/* trasform  along x and y */
   bigstride = (*ldx) * (*ldy);

   xhowmany = (*ny);
   xstride  = 1;
   xidist   = (*ldx);

   /* ptr = (double *)a; */

   for(i = 0; i < *nz ; i++) {
     /* trasform  along x */
     fftw(*p,xhowmany,&a[i*bigstride],xstride,xidist,0,0,0);
   }
   return 0;
}
Пример #16
0
void
NormalLineArray::doFirstFFT(int fftid, int direction)
{
    LineFFTinfo &fftinfo = (infoVec[fftid]->info);
    int ptype = fftinfo.ptype;
    int pblock = fftinfo.pblock;
    complex *line = fftinfo.dataPtr;
    int sizeX = fftinfo.sizeX;
    int sizeZ = fftinfo.sizeZ;
    int *xsquare = fftinfo.xsquare;
    int *ysquare = fftinfo.ysquare;
    int *zsquare = fftinfo.zsquare;

#ifdef HEAVYVERBOSE 
    {
	char fname[80];
	if(direction)
	snprintf(fname,80,"xline_%d.y%d.z%d.out", fftid,thisIndex.x, thisIndex.y);
	else
	snprintf(fname,80,"zline_%d.x%d.y%d.out", fftid,thisIndex.x, thisIndex.y);
      FILE *fp=fopen(fname,"w");
	for(int x = 0; x < sizeX*xsquare[0]*xsquare[1]; x++)
	    fprintf(fp, "%d  %g %g\n", x, line[x].re, line[x].im);
	fclose(fp);
    }
#endif

    if(direction && ptype==PencilType::XLINE)
	fftw(fwdplan, xsquare[0]*xsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0); // xPencilsPerSlab many 1-D fft's 
    else if(!direction && ptype==PencilType::ZLINE)
	fftw(bwdplan, zsquare[0]*zsquare[1], (fftw_complex*)line, 1, sizeZ, NULL, 0, 0);
    else
	CkAbort("Can't do this FFT\n");

    int x, y, z=0;
#ifdef VERBOSE
    CkPrintf("First FFT done at [%d %d] [%d %d]\n", thisIndex.x, thisIndex.y,sizeX,sizeZ);
#endif
    int baseX, ix, iy, iz;
    if(true) {//else if(pblock == PencilBlock::SQUAREBLOCK){
	if(direction)
	{
	    int sendSquarethick = ysquare[1] <= xsquare[1] ? ysquare[1]:xsquare[1];
	    int sendDataSize = ysquare[0]*xsquare[0] * sendSquarethick;
	    int zpos = thisIndex.y;
	    int index=0;
	    complex *sendData = NULL;

	    for(z = 0; z < xsquare[1]; z+=sendSquarethick){
		for(x = 0; x < sizeX; x+=ysquare[0]) {
		    SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg;
		    sendData = msg->data;
		    msg->ypos = thisIndex.x; 
		    msg->size = sendDataSize;
		    msg->id = fftid;
		    msg->direction = direction;
		    msg->data = sendData;
		    CkSetQueueing(msg, CK_QUEUEING_IFIFO);
#ifdef _PRIOMSG_
		    int prioNum =  (zpos+z) + x*sizeX;
		    *(int*)CkPriorityPtr(msg) = prioNum; 
#endif	
		    index = 0;
		    for(iz = z; iz < z+sendSquarethick; iz++)
			for(ix = x; ix < x+ysquare[0]; ix++)
			    for(y = 0; y < xsquare[0]; y++)
				sendData[index++] = line[iz*sizeX*xsquare[0]+y*sizeX+ix];
		
#ifdef VERBOSE	
		    CkPrintf(" [%d %d] sending to YLINES [	%d %d] \n",  thisIndex.x, thisIndex.y, thisIndex.y, x);
#endif	
		    yProxy(zpos+z, x).doSecondFFT(msg);
		}
	    //memset(sendData, 0, sizeof(complex)*yPencilsPerSlab*xPencilsPerSlab);
	    }
	}
	else
	{
	    int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0];
	    int sendDataSize = ysquare[1] * sendSquarewidth * zsquare[1];
	    int xpos = thisIndex.x;
	    int ypos = thisIndex.y;
	    int index=0;
	    complex *sendData = NULL;

	    for(x = 0; x < zsquare[0]; x+=sendSquarewidth)
		for(z = 0; z < sizeZ; z+=ysquare[1]){
		    SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg;
		    sendData = msg->data;
		    msg->ypos = thisIndex.y; 
		    msg->size = sendDataSize;
		    msg->id = fftid;
		    msg->direction = direction;
		    msg->data = sendData;
		    CkSetQueueing(msg, CK_QUEUEING_IFIFO);
#ifdef _PRIOMSG_
		    int prioNum =  (z) + (x+xpos)*sizeX;
		    *(int*)CkPriorityPtr(msg) = prioNum; 
#endif	
		    index = 0;
		    for(iz = z; iz < z+ysquare[1]; iz++)
			for (ix = x; ix < x+sendSquarewidth; ix++) 
			    for(iy = 0; iy < zsquare[1]; iy++)
				sendData[index++] = line[iz+ix*sizeZ+iy*sizeZ*zsquare[0]];
		
#ifdef VERBOSE	
		    CkPrintf(" [%d %d] sending	 to YLINES [%d %d] \n",  thisIndex.x, thisIndex.y, z, thisIndex.x);
#endif
		    yProxy(z, xpos+x).doSecondFFT(msg);
		}
	}
    }
}
Пример #17
0
int
main(int argc, char **argv)
{
	int sample, samples, spectrum, spectra, bin, bins, dummy;
	float re, im, binPower;
	fftw_plan sigPlan;
	FILE *iFp, *oFp;

	if (getArgs(argc, argv))
		exit (1);

	if (!(iFp = fopen(infile, "r"))) {
		cout << " opening input file" << endl;
		exit(2);
	}

	if (!(oFp = fopen(outfile, "w"))) {
		cout << " opening output file" << endl;
		exit(3);
	}

	samples = subbands * halfFrames * 512;
	bins = 2 * subbands * 512;
	spectra = samples / bins;

	sigPlan = fftw_create_plan(bins, FFTW_FORWARD, FFTW_ESTIMATE);

	double power[bins];
	float_complex td[samples];
	float_complex fd[samples];

	// extract the samples from the channel file
	for (sample = 0; sample < samples; sample++) {
		fscanf(iFp, "%d (%f, %f)\n", &dummy, &re, &im);
		td[sample] = float_complex(re, im);
	}

	// now perform a full-width fft to create the signal bins
	fftw(sigPlan, spectra, (fftw_complex *) td, 1, bins, (fftw_complex *) fd,
			1, bins);

	// rearrange the data so that DC is in the middle of the spectrum
	float_complex temp[bins/2];

	for (spectrum = 0; spectrum < spectra; spectrum++) {
		memcpy(temp, &fd[spectrum*bins], sizeof(float_complex) * bins / 2);
		memcpy(&fd[spectrum*bins], &fd[spectrum*bins+bins/2],
				sizeof(float_complex) * bins / 2);
		memcpy(&fd[spectrum*bins+bins/2], &temp, sizeof(float_complex)
				* bins / 2);
	}

	for (bin = 0; bin < bins; bin++)
		power[bin] = 0;
	
	// now compute the total power in each frequency bin
	for (spectrum = 0; spectrum < spectra; spectrum++) {
		for (bin = 0; bin < bins; bin++) {
			binPower = norm(fd[spectrum*bins+bin]);
#ifdef notdef
			fprintf(oFp, "%03d:%05d (%.3f, %.3f) (%.3f)\n", spectrum, bin,
					fd[spectrum*bins+bin].real(), fd[spectrum*bins+bin].imag(),
					binPower);
#endif
			power[bin] += binPower;
		}
	}

	// print the powers in the bins
	for (bin = 0; bin < bins; bin++)
		fprintf(oFp, "%05d: %.3le\n", bin, power[bin]);

	fclose(iFp);
	fclose(oFp);
}
Пример #18
0
void fftw_buffered(fftw_plan p, int howmany,
		   fftw_complex *in, int istride, int idist,
		   fftw_complex *work,
		   int nbuffers, fftw_complex *buffers)
{
     int i = 0, n, nb;

     n = p->n;
     nb = n + FFTWND_BUFFER_PADDING;

     do {
	  for (; i <= howmany - nbuffers; i += nbuffers) {
	       fftw_complex *cur_in = in + i * idist;
	       int j, buf;

	       /* 
	        * First, copy nbuffers strided arrays to the
	        * contiguous buffer arrays (reading consecutive
	        * locations, assuming that idist is 1):
	        */
	       for (j = 0; j < n; ++j) {
		    fftw_complex *cur_in2 = cur_in + j * istride;
		    fftw_complex *cur_buffers = buffers + j;

		    for (buf = 0; buf <= nbuffers - 4; buf += 4) {
			 *cur_buffers = *cur_in2;
			 *(cur_buffers += nb) = *(cur_in2 += idist);
			 *(cur_buffers += nb) = *(cur_in2 += idist);
			 *(cur_buffers += nb) = *(cur_in2 += idist);
			 cur_buffers += nb;
			 cur_in2 += idist;
		    }
		    for (; buf < nbuffers; ++buf) {
			 *cur_buffers = *cur_in2;
			 cur_buffers += nb;
			 cur_in2 += idist;
		    }
	       }

	       /* 
	        * Now, compute the FFTs in the buffers (in-place
	        * using work): 
	        */
	       fftw(p, nbuffers, buffers, 1, nb, work, 1, 0);

	       /* 
	        * Finally, copy the results back from the contiguous
	        * buffers to the strided arrays (writing consecutive
	        * locations):
	        */
	       for (j = 0; j < n; ++j) {
		    fftw_complex *cur_in2 = cur_in + j * istride;
		    fftw_complex *cur_buffers = buffers + j;

		    for (buf = 0; buf <= nbuffers - 4; buf += 4) {
			 *cur_in2 = *cur_buffers;
			 *(cur_in2 += idist) = *(cur_buffers += nb);
			 *(cur_in2 += idist) = *(cur_buffers += nb);
			 *(cur_in2 += idist) = *(cur_buffers += nb);
			 cur_buffers += nb;
			 cur_in2 += idist;
		    }
		    for (; buf < nbuffers; ++buf) {
			 *cur_in2 = *cur_buffers;
			 cur_buffers += nb;
			 cur_in2 += idist;
		    }
	       }
	  }

	  /* 
	   * we skip howmany % nbuffers ffts at the end of the loop,
	   * so we have to go back and do them: 
	   */
	  nbuffers = howmany - i;
     } while (i < howmany);
}
Пример #19
0
int increBoundary(void)
{
    /* External Variables */
    extern int Nx, Nz;
    extern fftw_complex ***CT;  /* 6-by-(3Nz/2)-by-(3*Nx/4+1) */
    extern mcomplex **Uxb, **Uzb;
    extern fftw_plan pf1, pf2;
    extern rfftwnd_plan pr1, pr2;
    extern double *Kx, *Kz;

    int x, i, z, idx;
    double norm, tmp1, tmp2, tmp3;
    fftw_real *RT;              /* real to complex transform */
    fftw_complex *fout = NULL;
    fftw_real *rout = NULL;

    idx = (3 * Nz / 2) * (3 * Nx / 2 + 2);
    RT = (fftw_real *) CT[0][0];
    norm = 1.0 / ((3. * Nx / 2.) * (3. * Nz / 2.));

    memset(CT[0][0], 0,
           MAXT * (3 * Nz / 2) * (3 * Nx / 4 + 1) * sizeof(fftw_complex));

    /* store Uxb hat and Uzb hat and w hat on CT for inverse FFT */
    for (z = 0; z < Nz / 2; ++z) {
        /* CT[0] store the data of Uxb, CT[1] storedata for Uzb */
        memcpy(CT[0][z], Uxb[z], (Nx / 2) * sizeof(fftw_complex));
        memcpy(CT[1][z], Uzb[z], (Nx / 2) * sizeof(fftw_complex));
        /*      for(x=0; x<Nx/2; ++x)
           {
           Re(CT[2][z][x])=1.0;
           Im(CT[2][z][x])=0.;
           } */
    }

    for (z = Nz / 2 + 1; z < Nz; ++z) {
        memcpy(CT[0][z + Nz / 2], Uxb[z], (Nx / 2) * sizeof(fftw_complex));
        memcpy(CT[1][z + Nz / 2], Uzb[z], (Nx / 2) * sizeof(fftw_complex));
        /*for(x=0; x<Nx/2; ++x)
           {
           Re(CT[2][z+Nz/2][x])=1.0;
           Im(CT[2][z+Nz/2][x])=0.;
           }
         */
    }

    Re(CT[2][1][1]) = 1.;
    //Re(CT[2][3*Nz/2-1][0])=1.;
    //Re(CT[2][0][0])=1.;

    //  Re(CT[2][0][0])=1.;


    /* inverse Fourier transform */
    for (i = 0; i < 3; ++i) {
        /* Each column of CT[i] */
        fftw(pf1, Nx / 2, CT[i][0], 3 * Nx / 4 + 1, 1, fout, -1, -1);

        /* Each row of CT[i] */
        rfftwnd_complex_to_real(pr1, 3 * Nz / 2, CT[i][0], 1,
                                3 * Nx / 4 + 1, rout, -1, -1);
    }

    /* compute (dux)*(w.n) and (duz)*(w.n) */
    for (z = 0; z < (3 * Nz / 2); ++z) {
        for (x = 0; x < 3 * Nx / 2; ++x) {
            RT[(z * (3 * Nx / 2 + 2) + x)] =
                RT[(z * (3 * Nx / 2 + 2) + x)] * RT[2 * idx +
                                                    (z * (3 * Nx / 2 + 2) +
                                                     x)];
            RT[idx + (z * (3 * Nx / 2 + 2) + x)] =
                RT[idx + (z * (3 * Nx / 2 + 2) + x)] * RT[2 * idx +
                                                          (z *
                                                           (3 * Nx / 2 +
                                                            2) + x)];
        }
    }


    /* Fourier transform to get Uxb hats and Uzb hats. */
    for (i = 0; i < 3; ++i) {

        /* Each row of RT[i] */
        rfftwnd_real_to_complex(pr2, 3 * Nz / 2, RT + (i * idx), 1,
                                3 * Nx / 2 + 2, fout, -1, -1);

        /* Each column of CT[i] */
        fftw(pf2, Nx / 2, CT[i][0], 3 * Nx / 4 + 1, 1, fout, -1, -1);

        /* constant of FFT */
        for (z = 0; z < Nz / 2; ++z) {
            for (x = 0; x < Nx / 2; ++x) {
                Re(CT[i][z][x]) = norm * Re(CT[i][z][x]);
                Im(CT[i][z][x]) = norm * Im(CT[i][z][x]);
            }
        }

        for (z = Nz + 1; z < 3 * Nz / 2; ++z) {
            for (x = 0; x < Nx / 2; ++x) {
                Re(CT[i][z][x]) = norm * Re(CT[i][z][x]);
                Im(CT[i][z][x]) = norm * Im(CT[i][z][x]);
            }
        }
    }

    /*put date back in array Uxb and Uzb */
    memset(Uxb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex));
    memset(Uzb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex));
    for (z = 0; z < Nz / 2; ++z) {
        memcpy(Uxb[z], CT[0][z], Nx / 2 * sizeof(fftw_complex));
        memcpy(Uzb[z], CT[1][z], Nx / 2 * sizeof(fftw_complex));
    }
    for (z = Nz + 1; z < 3 * Nz / 2; ++z) {
        memcpy(Uxb[z - Nz / 2], CT[0][z], Nx / 2 * sizeof(fftw_complex));
        memcpy(Uzb[z - Nz / 2], CT[1][z], Nx / 2 * sizeof(fftw_complex));
    }

    /* further computation to get c1, c2, c3, c4 as in the note and results are rewritten in
       Uxb and Uzb:
       c1=g hat=iKz*Uxb-iKx*Uzb;-------rewrittin in Uxb
       c2=du_y=-iKx*Uxb-iKz*Uzb;-------rewrittin in Uzb
       c3=U=Uxb(0,0);           -------rewritten in Uxb[0][0]
       c4=Uzb(0,0)              -------rewritten in Uzb[0][0] */

    for (z = 0; z < Nz; ++z) {
        for (x = 0; x < Nx / 2; ++x) {
            if (z * z + x * x > 0) {
                tmp1 = -Kz[z] * Im(Uxb[z][x]) + Kx[x] * Im(Uzb[z][x]);
                tmp2 = Kz[z] * Re(Uxb[z][x]) - Kx[x] * Re(Uzb[z][x]);
                tmp3 = Kx[x] * Im(Uxb[z][x]) + Kz[z] * Im(Uzb[z][x]);

                Im(Uzb[z][x]) =
                    -Kx[x] * Re(Uxb[z][x]) - Kz[z] * Re(Uzb[z][x]);
                Re(Uzb[z][x]) = tmp3;
                Re(Uxb[z][x]) = tmp1;
                Im(Uxb[z][x]) = tmp2;
            }
        }
    }


    return (NO_ERR);
}
Пример #20
0
void F77_FUNC_(fftw_f77,FFTW_F77)
(fftw_plan *p, int *howmany, fftw_complex *in, int *istride, int *idist,
 fftw_complex *out, int *ostride, int *odist)
{
     fftw(*p,*howmany,in,*istride,*idist,out,*ostride,*odist);
}
Пример #21
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size, nalloc;
     fftw_complex *in, *work;
     fftw_plan plan = 0;
     fftw_mpi_plan mpi_plan;
     double t, t0 = 0.0;

     if (specific || !(flags & FFTW_IN_PLACE))
	  return;

     if (io_okay && !only_parallel)
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);

     mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir,
				     speed_flag | flags
				     | wisdom_flag | no_vector_flag);

     CHECK(mpi_plan, "failed to create plan!");

     fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     if (io_okay && !only_parallel)
	  nalloc = n;
     else
	  nalloc = total_local_size;

     in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
				       * sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
					 * sizeof(fftw_complex));

     if (io_okay) {
	  WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan));
     }

     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftw(plan, howmany_fields,
			     in, howmany_fields, 1, work, 1, 0),
			in, n * howmany_fields, t0);

	  fftw_destroy_plan(plan);

	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     }
     
     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftw_free(in);
     fftw_free(work);
     fftw_mpi_destroy_plan(mpi_plan);

     WHEN_VERBOSE(1, my_printf("\n"));
}
Пример #22
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size;
     fftw_complex *in1, *work = NULL, *in2, *out2;
     fftw_mpi_plan plan;
     int i;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (specific) {
	  WHEN_VERBOSE(2, my_printf("N/A\n"));
	  return;
     }

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags);

     fftw_mpi_local_sizes(plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     in1 = (fftw_complex *) fftw_malloc(total_local_size 
					* sizeof(fftw_complex) * howmany);
     if (coinflip()) {
	  WHEN_VERBOSE(2, my_printf("w/work..."));
	  work = (fftw_complex *) fftw_malloc(total_local_size
                                        * sizeof(fftw_complex) * howmany);
     }
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in2[i]) = DRAND();
	  c_im(in2[i]) = DRAND();
     }
     for (i = 0; i < local_n * howmany; ++i) {
	  c_re(in1[i]) = c_re(in2[i + local_start*howmany]);
	  c_im(in1[i]) = c_im(in2[i + local_start*howmany]);
     }	  

     /* fft-ize */
     fftw_mpi(plan, howmany, in1, work);

     fftw_mpi_destroy_plan(plan);

     fftw(validated_plan, howmany, in2, howmany, 1, out2, howmany, 1);

     CHECK(compute_error_complex(in1, 1,
				 out2 + local_start_after_transform*howmany, 1,
				 howmany*local_n_after_transform) < TOLERANCE,
	   "test_in_place: wrong answer");

     WHEN_VERBOSE(2, my_printf("OK\n"));

     fftw_free(in1);
     fftw_free(work);
     fftw_free(in2);
     fftw_free(out2);
}
Пример #23
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     fftw_complex *in1, *in2, *out2;
     fftw_plan plan;
     int i, j;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany);
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     if (!specific)
	  plan = fftw_create_plan(n, dir, flags);
     else
	  plan = fftw_create_plan_specific(n, dir, flags,
					   in1, istride,
					   (fftw_complex *) NULL, 0);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in1[i * istride]) = c_re(in2[i]) = DRAND();
	  c_im(in1[i * istride]) = c_im(in2[i]) = DRAND();
     }

     /* 
      * fill in other positions of the array, to make sure that
      * fftw doesn't overwrite them 
      */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i) {
	       c_re(in1[i * istride + j]) = i * istride + j;
	       c_im(in1[i * istride + j]) = i * istride - j;
	  }
     CHECK(plan != NULL, "can't create plan");
     WHEN_VERBOSE(2, fftw_print_plan(plan));

     /* fft-ize */
     if (howmany != 1 || istride != 1 || coinflip())
	  fftw(plan, howmany, in1, istride, n * istride,
	       (fftw_complex *) NULL, 0, 0);
     else
	  fftw_one(plan, in1, NULL);

     fftw_destroy_plan(plan);

     /* check for overwriting */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       CHECK(c_re(in1[i * istride + j]) == i * istride + j &&
		     c_im(in1[i * istride + j]) == i * istride - j,
		     "input has been overwritten");

     for (i = 0; i < howmany; ++i) {
	  fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n);
     }

     CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE,
	   "test_in_place: wrong answer");
     WHEN_VERBOSE(2, printf("OK\n"));

     fftw_free(in1);
     fftw_free(in2);
     fftw_free(out2);
}
Пример #24
0
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
  int i,total,length,offset,num;
  FFT_SCALAR norm, *out_ptr;
  FFT_DATA *data,*copy;

  // system specific constants

#if defined(FFT_SCSL)
  int isys = 0;
  FFT_PREC scalef = 1.0;
#elif defined(FFT_DEC)
  char c = 'C';
  char f = 'F';
  char b = 'B';
  int one = 1;
#elif defined(FFT_T3E)
  int isys = 0;
  double scalef = 1.0;
#elif defined(FFT_ACML)
  int info;
#elif defined(FFT_FFTW3)
  FFTW_API(plan) theplan;
#else
  // nothing to do for other FFTs.
#endif

  // pre-remap to prepare for 1st FFTs if needed
  // copy = loc for remap result

  if (plan->pre_plan) {
    if (plan->pre_target == 0) copy = out;
    else copy = plan->copy;
    remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch,
             plan->pre_plan);
    data = copy;
  }
  else
    data = in;

  // 1d FFTs along fast axis

  total = plan->total1;
  length = plan->length1;

#if defined(FFT_SGI)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,&data[offset],1,plan->coeff1);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff1,
           plan->work1,&isys);
#elif defined(FFT_ACML)
  num=total/length;
  FFT_1D(&flag,&num,&length,data,plan->coeff1,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&data[offset],&length,&flag,plan->coeff1);
#elif defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_fast,data);
  else
    DftiComputeBackward(plan->handle_fast,data);
#elif defined(FFT_DEC)
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
  else
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#elif defined(FFT_T3E)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1,
           plan->work1,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_fast_forward;
  else
    theplan=plan->plan_fast_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]);
#endif

  // 1st mid-remap to prepare for 2nd FFTs
  // copy = loc for remap result

  if (plan->mid1_target == 0) copy = out;
  else copy = plan->copy;
  remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch,
           plan->mid1_plan);
  data = copy;

  // 1d FFTs along mid axis

  total = plan->total2;
  length = plan->length2;

#if defined(FFT_SGI)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,&data[offset],1,plan->coeff2);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff2,
           plan->work2,&isys);
#elif defined(FFT_ACML)
  num=total/length;
  FFT_1D(&flag,&num,&length,data,plan->coeff2,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&data[offset],&length,&flag,plan->coeff2);
#elif defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_mid,data);
  else
    DftiComputeBackward(plan->handle_mid,data);
#elif defined(FFT_DEC)
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
  else
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#elif defined(FFT_T3E)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2,
           plan->work2,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_mid_forward;
  else
    theplan=plan->plan_mid_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]);
#endif

  // 2nd mid-remap to prepare for 3rd FFTs
  // copy = loc for remap result

  if (plan->mid2_target == 0) copy = out;
  else copy = plan->copy;
  remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch,
           plan->mid2_plan);
  data = copy;

  // 1d FFTs along slow axis

  total = plan->total3;
  length = plan->length3;

#if defined(FFT_SGI)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,&data[offset],1,plan->coeff3);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff3,
           plan->work3,&isys);
#elif defined(FFT_ACML)
  num=total/length;
  FFT_1D(&flag,&num,&length,data,plan->coeff3,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&data[offset],&length,&flag,plan->coeff3);
#elif defined(FFT_MKL)
  if (flag == -1)
    DftiComputeForward(plan->handle_slow,data);
  else
    DftiComputeBackward(plan->handle_slow,data);
#elif defined(FFT_DEC)
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one);
  else
    for (offset = 0; offset < total; offset += length)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one);
#elif defined(FFT_T3E)
  for (offset = 0; offset < total; offset += length)
    FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3,
           plan->work3,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_slow_forward;
  else
    theplan=plan->plan_slow_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1)
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]);
  else
    for (offset = 0; offset < total; offset += length)
      kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]);
#endif

  // post-remap to put data in output format if needed
  // destination is always out

  if (plan->post_plan)
    remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out, (FFT_SCALAR *) plan->scratch,
             plan->post_plan);

  // scaling if required
#if !defined(FFT_T3E) && !defined(FFT_ACML)
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    out_ptr = (FFT_SCALAR *)out;
    for (i = 0; i < num; i++) {
#if defined(FFT_FFTW3)
      *(out_ptr++) *= norm;
      *(out_ptr++) *= norm;
#elif defined(FFT_MKL)
      out[i] *= norm;
#else
      out[i].re *= norm;
      out[i].im *= norm;
#endif
    }
  }
#endif

#ifdef FFT_T3E
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    for (i = 0; i < num; i++) out[i] *= (norm,norm);
  }
#endif

#ifdef FFT_ACML
  norm = plan->norm;
  num = plan->normnum;
  for (i = 0; i < num; i++) {
    out[i].re *= norm;
    out[i].im *= norm;
  }
#endif

}
Пример #25
0
int main(int argc, char **argv) {
  
  int c, mu, status;
  int filename_set = 0;
  int mode = 0;
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix, iiy, gid;
  int Thp1, nclass;
  int *oh_count=(int*)NULL, *oh_id=(int*)NULL, oh_nc;
  int *picount;
  double *conn = (double*)NULL;
  double *conn2 = (double*)NULL;
  double **oh_val=(double**)NULL;
  double q[4], qsqr;
  int verbose = 0;
  char filename[800];
  double ratime, retime;
  FILE *ofs;
  fftw_complex *corrt=NULL;

  fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL;

  fftw_plan plan_m;

  while ((c = getopt(argc, argv, "h?vf:m:")) != -1) {
    switch (c) {
    case 'v':
      verbose = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'm':
      mode = atoi(optarg);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  /* set the default values */
  if(filename_set==0) strcpy(filename, "cvc.input");
  fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

  /* some checks on the input data */
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n");
    usage();
  }

  /* initialize MPI parameters */
  mpi_init(argc, argv);

  /* initialize fftw, create plan with FFTW_FORWARD ---  in contrast to
   * FFTW_BACKWARD in e.g. avc_exact */
  plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE);
  if(plan_m==NULL) {
    fprintf(stderr, "Error, could not create fftw plan\n");
    return(1);
  }

  T            = T_global;
  Thp1         = T/2 + 1;
  Tstart       = 0;
  l_LX_at      = LX;
  l_LXstart_at = 0;
  FFTW_LOC_VOLUME = T*LX*LY*LZ;
  fprintf(stdout, "# [%2d] fftw parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] l_LX_at      = %3d\n"\
		  "# [%2d] l_LXstart_at = %3d\n"\
		  "# [%2d] FFTW_LOC_VOLUME = %3d\n", 
		  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at,
		  g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME);

  if(init_geometry() != 0) {
    fprintf(stderr, "ERROR from init_geometry\n");
    exit(1);
  }

  geometry();

  /****************************************
   * allocate memory for the contractions *
   ****************************************/
  conn = (double*)calloc(32*VOLUME, sizeof(double));
  if( (conn==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(3);
  }

/*
  conn2 = (double*)calloc(32*VOLUME, sizeof(double));
  if( (conn2==(double*)NULL) ) {
    fprintf(stderr, "could not allocate memory for contr. fields\n");
    exit(4);
  }

  pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex));
  if( (pi00==(fftw_complex*)NULL) ) {
    fprintf(stderr, "could not allocate memory for pi00\n");
    exit(2);
  }

  pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex));
  if( (pijj==(fftw_complex*)NULL) ) {
    fprintf(stderr, "could not allocate memory for pijj\n");
    exit(2);
  }
*/
  corrt = fftw_malloc(T*sizeof(fftw_complex));

  for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) {

//    for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;}
//    for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;}
    /***********************
     * read contractions   *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;

    sprintf(filename, "%s", filename_prefix);
    fprintf(stdout, "# Reading data from file %s\n", filename);
    status = read_lime_contraction(conn, filename, 16, 0);
    if(status == 106) {
      fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status);
      continue;
    }
/*
    sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid);
    fprintf(stdout, "# Reading data from file %s\n", filename);
    status = read_lime_contraction(conn2, filename, 16, 0);
    if(status == 106) {
      fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status);
      continue;
    }
*/
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime);

    /***********************
     * fill the correlator *
     ***********************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
/*
    for(x1=0; x1<LX; x1++) {
    for(x2=0; x2<LY; x2++) {
    for(x3=0; x3<LZ; x3++) {
      for(x0=0; x0<T; x0++) {
        iix = g_ipt[0][x1][x2][x3]*T+x0;
        for(mu=1; mu<4; mu++) {
          ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME);
          pijj[iix].re += ( conn[ix  ] - conn2[ix  ] ) * (double)Nsave / (double)(Nsave-1);
          pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1);
        }
        ix = 2*g_ipt[x0][x1][x2][x3];
        pi00[iix].re += ( conn[ix  ] - conn2[ix  ] ) * (double)Nsave / (double)(Nsave-1);
        pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1);
      }
    }}}
*/
    for(x0=0; x0<T; x0++) {
      ix = g_ipt[x0][0][0][0];
      corrt[x0].re = conn[_GWI(5,ix,VOLUME)  ] + conn[_GWI(10,ix,VOLUME)  ] + conn[_GWI(15,ix,VOLUME)  ];
      corrt[x0].im = conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1];
      corrt[x0].re /= (double)T;
      corrt[x0].im /= (double)T;
    }
/*    fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */
    fftw_one(plan_m, corrt, NULL);
    sprintf(filename, "rho.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing VKVK data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid);
    for(x0=1; x0<(T/2); x0++) {
      fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, 
        corrt[x0].re, corrt[T-x0].re, gid);
    }
    fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid);

    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime);

#ifdef _UNDEF
 
    free(conn);
/*    free(conn2); */

    /********************************
     * test: print correl to stdout *
     ********************************/
/*
  fprintf(stdout, "\n\n# *****************   pijj   *****************\n");
  for(ix=0; ix<LX*LY*LZ; ix++) {
    iix = ix*T;
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pijj[iix+x0].re, pijj[iix+x0].im);
    }
  }
  fprintf(stdout, "\n\n# *****************   pi00   *****************\n");
  for(ix=0; ix<LX*LY*LZ; ix++) {
    iix = ix*T;
    for(x0=0; x0<T; x0++) {
      fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pi00[iix+x0].re, pi00[iix+x0].im);
    }
  }
*/

    /*****************************************
     * do the reverse Fourier transformation *
     *****************************************/
    ratime = (double)clock() / CLOCKS_PER_SEC;
    fftw(plan_m, LX*LY*LZ,  pi00, 1, T, (fftw_complex*)NULL, 0, 0);
    fftw(plan_m, LX*LY*LZ,  pijj, 1, T, (fftw_complex*)NULL, 0, 0);

    for(ix=0; ix<VOLUME; ix++) {
      pi00[ix].re /= (double)T; pi00[ix].im /= (double)T;
      pijj[ix].re /= 3.*(double)T; pijj[ix].im /= 3.*(double)T;
    }
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for Fourier transform %e seconds\n", retime-ratime);

  /*****************************************
   * write to file
   *****************************************/
  ratime = (double)clock() / CLOCKS_PER_SEC;
  sprintf(filename, "pi00.%.4d", gid);
  if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
    fprintf(stderr, "Error: could not open file %s for writing\n", filename);
    exit(5);
  }
  fprintf(stdout, "# writing pi00-data to file %s\n", filename);
  fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
  for(x1=0; x1<LX; x1++) {
  for(x2=0; x2<LY; x2++) {
  for(x3=0; x3<LZ; x3++) {
    ix = g_ipt[0][x1][x2][x3]*T;
/*    fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */
    for(x0=0; x0<T; x0++) {
/*      fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pi00[ix+x0].re, pi00[ix+x0].im); */
      fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pi00[ix+x0].re, pi00[ix+x0].im);
    }
  }}}
  fclose(ofs);

  sprintf(filename, "pijj.%.4d", gid);
  if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
    fprintf(stderr, "Error: could not open file %s for writing\n", filename);
    exit(5);
  }
  fprintf(stdout, "# writing pijj-data to file %s\n", filename);
  fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
  for(x1=0; x1<LX; x1++) {
  for(x2=0; x2<LY; x2++) {
  for(x3=0; x3<LZ; x3++) {
    ix = g_ipt[0][x1][x2][x3]*T;
/*    fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */
    for(x0=0; x0<T; x0++) {
/*      fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pijj[ix+x0].re, pijj[ix+x0].im); */
      fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pijj[ix+x0].re, pijj[ix+x0].im);
    }
  }}}
  fclose(ofs);

  retime = (double)clock() / CLOCKS_PER_SEC;
  fprintf(stdout, "# time to write correlator %e seconds\n", retime-ratime);

/*
  if(mode==0) {
    ratime = (double)clock() / CLOCKS_PER_SEC;
    if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110);
    sprintf(filename, "corr.00.mom");
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    for(ix=0; ix<VOLUME; ix++) picount[ix] = 0;
    for(x1=0; x1<LX; x1++) {
      q[1] = 2. * sin(M_PI * (double)x1 / (double)LX);
    for(x2=0; x2<LY; x2++) {
      q[2] = 2. * sin(M_PI * (double)x2 / (double)LY);
    for(x3=0; x3<LZ; x3++) {
      q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ);
      qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; 
      if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) {
        ix = g_ipt[0][x1][x2][x3];
        picount[ix] = 1;
        fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr);
      }
    }}}
    fclose(ofs);
    sprintf(filename, "corr_00.00.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_00-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    sprintf(filename, "corr_jj.00.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_jj-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime);
    free(picount);
  } else if(mode==1) {
    ratime = (double)clock() / CLOCKS_PER_SEC;
    if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110);
    sprintf(filename, "corr.01.mom");
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110);
    for(ix=0; ix<VOLUME; ix++) picount[ix] = 0;
    for(x1=0; x1<LX; x1++) {
      q[1] = 2. * M_PI * (double)x1 / (double)LX;
    for(x2=0; x2<LY; x2++) {
      q[2] = 2. * M_PI * (double)x2 / (double)LY;
    for(x3=0; x3<LZ; x3++) {
      q[3] = 2. * M_PI * (double)x3 / (double)LZ;
      qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; 
      if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) {
        ix = g_ipt[0][x1][x2][x3];
        picount[ix] = 1;
        fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr);
      }
    }}}
    fclose(ofs);
    sprintf(filename, "corr_00.01.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_01-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    sprintf(filename, "corr_jj.01.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr_jj-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(ix=0; ix<VOLUME; ix++) {
      if(picount[ix]>0) {
        for(x0=0; x0<T; x0++) {
          fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im);
        }
      }
    }
    fclose(ofs);
    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for writing: %e seconds\n", retime-ratime);
    free(picount);
  } else if(mode==2) {
    if(make_H3orbits(&oh_id, &oh_count, &oh_val, &oh_nc) != 0) return(123);
    ratime = (double)clock() / CLOCKS_PER_SEC;
    nclass = oh_nc / Thp1;
    if( (piavg = (fftw_complex*)malloc(oh_nc*sizeof(fftw_complex))) == (fftw_complex*)NULL) exit(110);
    if( (picount = (int*)malloc(oh_nc*sizeof(int))) == (int*)NULL) exit(110);

    for(ix=0; ix<oh_nc; ix++) {
      piavg[ix].re = 0.; 
      piavg[ix].im = 0.;
      picount[ix]  = 0;
    }

    for(ix=0; ix<LX*LY*LZ; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*T+x0;
        iiy = oh_id[ix]*Thp1+x0;
        piavg[iiy].re += pi00[iix].re;
        piavg[iiy].im += pi00[iix].im;
        if(x0>0 && x0<T/2) {
          iix = ix*T+(T-x0);
          piavg[iiy].re += pi00[iix].re;
          piavg[iiy].im += pi00[iix].im;
        }
      }
      picount[oh_id[ix]]++;
    }
    for(ix=0; ix<nclass; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*Thp1+x0;
        if(picount[ix]>0) {
          piavg[iix].re /= (double)picount[ix];
          piavg[iix].im /= (double)picount[ix];
          if(x0>0 && x0<T/2) {
            piavg[iix].re /= 2.;
            piavg[iix].im /= 2.;
          }
        }
      }
    }
    sprintf(filename, "corr02_00.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr-00-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(x1=0; x1<nclass; x1++) {
      if(oh_val[0][x1]>=g_qhatsqr_min-_Q2EPS && oh_val[0][x1]<=g_qhatsqr_max+_Q2EPS) {
        ix = x1*Thp1;
        for(x0=0; x0<Thp1; x0++) {
          fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, 
            picount[x1]);
        }
      }
    }
    fclose(ofs);

    for(ix=0; ix<oh_nc; ix++) {
      piavg[ix].re = 0.; 
      piavg[ix].im = 0.;
      picount[ix]  = 0;
    }

    for(ix=0; ix<LX*LY*LZ; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*T+x0;
        iiy = oh_id[ix]*Thp1+x0;
        piavg[iiy].re += pijj[iix].re;
        piavg[iiy].im += pijj[iix].im;
        if(x0>0 && x0<T/2) {
          iix = ix*T+(T-x0);
          piavg[iiy].re += pijj[iix].re;
          piavg[iiy].im += pijj[iix].im;
        }
      }
      picount[oh_id[ix]]++;
    }
    for(ix=0; ix<nclass; ix++) {
      for(x0=0; x0<Thp1; x0++) {
        iix = ix*Thp1+x0;
        if(picount[ix]>0) {
          piavg[iix].re /= (double)picount[ix];
          piavg[iix].im /= (double)picount[ix];
          if(x0>0 && x0<T/2) {
            piavg[iix].re /= 2.;
            piavg[iix].im /= 2.;
          }
        }
    }}
  
    sprintf(filename, "corr02_jj.%.4d", gid);
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    fprintf(stdout, "# writing corr-jj-data to file %s\n", filename);
    fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu);
    for(x1=0; x1<nclass; x1++) {
      ix = x1*Thp1;
      for(x0=0; x0<Thp1; x0++) {
        fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, 
          picount[x1]);
      }
    }
    fclose(ofs);
    sprintf(filename, "corr.02.mom");
    if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) {
      fprintf(stderr, "Error: could not open file %s for writing\n", filename);
      exit(5);
    }
    for(ix=0; ix<VOLUME; ix++) fprintf(ofs, "%5d%25.16e%5d", ix, oh_val[0][ix], picount[ix]);
    fclose(ofs);


    retime = (double)clock() / CLOCKS_PER_SEC;
    fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime);

    free(piavg); free(picount);
  }
*/

#endif
  }

  /***************************************
   * free the allocated memory, finalize *
   ***************************************/
  free(corrt);
  free_geometry();
/*
  free(pi00);
  free(pijj);
*/
  fftw_destroy_plan(plan_m);

  return(0);

}
Пример #26
0
int F77_FUNC_ (fft_y_stick, FFT_Y_STICK)
   (fftw_plan *p, FFTW_COMPLEX *a, int *ny, int *ldx )
{
   fftw(*p, 1, a, (*ldx), 1, 0, 0, 0);
   return 0;
}
Пример #27
0
void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
{
  int i,total,length,offset,num;
  FFT_SCALAR norm, *data_ptr;

  // system specific constants

#ifdef FFT_SCSL
  int isys = 0;
  FFT_PREC scalef = 1.0;
#endif
#ifdef FFT_DEC
  char c = 'C';
  char f = 'F';
  char b = 'B';
  int one = 1;
#endif
#ifdef FFT_T3E
  int isys = 0;
  double scalef = 1.0;
#endif

  // total = size of data needed in each dim
  // length = length of 1d FFT in each dim
  // total/length = # of 1d FFTs in each dim
  // if total > nsize, limit # of 1d FFTs to available size of data

  int total1 = plan->total1;
  int length1 = plan->length1;
  int total2 = plan->total2;
  int length2 = plan->length2;
  int total3 = plan->total3;
  int length3 = plan->length3;

// fftw3 and Dfti in MKL encode the number of transforms
// into the plan, so we cannot operate on a smaller data set.
#if defined(FFT_MKL) || defined(FFT_FFTW3)
  if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize))
    return;
#endif
  if (total1 > nsize) total1 = (nsize/length1) * length1;
  if (total2 > nsize) total2 = (nsize/length2) * length2;
  if (total3 > nsize) total3 = (nsize/length3) * length3;

  // perform 1d FFTs in each of 3 dimensions
  // data is just an array of 0.0

#ifdef FFT_SGI
  for (offset = 0; offset < total1; offset += length1)
    FFT_1D(flag,length1,&data[offset],1,plan->coeff1);
  for (offset = 0; offset < total2; offset += length2)
    FFT_1D(flag,length2,&data[offset],1,plan->coeff2);
  for (offset = 0; offset < total3; offset += length3)
    FFT_1D(flag,length3,&data[offset],1,plan->coeff3);
#elif defined(FFT_SCSL)
  for (offset = 0; offset < total1; offset += length1)
    FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1,
           plan->work1,&isys);
  for (offset = 0; offset < total2; offset += length2)
    FFT_1D(flag,length2,scalef,&data[offset],&data[offset],plan->coeff2,
           plan->work2,&isys);
  for (offset = 0; offset < total3; offset += length3)
    FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3,
           plan->work3,&isys);
#elif defined(FFT_ACML)
  int info=0;
  num=total1/length1;
  FFT_1D(&flag,&num,&length1,data,plan->coeff1,&info);
  num=total2/length2;
  FFT_1D(&flag,&num,&length2,data,plan->coeff2,&info);
  num=total3/length3;
  FFT_1D(&flag,&num,&length3,data,plan->coeff3,&info);
#elif defined(FFT_INTEL)
  for (offset = 0; offset < total1; offset += length1)
    FFT_1D(&data[offset],&length1,&flag,plan->coeff1);
  for (offset = 0; offset < total2; offset += length2)
    FFT_1D(&data[offset],&length2,&flag,plan->coeff2);
  for (offset = 0; offset < total3; offset += length3)
    FFT_1D(&data[offset],&length3,&flag,plan->coeff3);
#elif defined(FFT_MKL)
  if (flag == -1) {
    DftiComputeForward(plan->handle_fast,data);
    DftiComputeForward(plan->handle_mid,data);
    DftiComputeForward(plan->handle_slow,data);
  } else {
    DftiComputeBackward(plan->handle_fast,data);
    DftiComputeBackward(plan->handle_mid,data);
    DftiComputeBackward(plan->handle_slow,data);
  }
#elif defined(FFT_DEC)
  if (flag == -1) {
    for (offset = 0; offset < total1; offset += length1)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one);
    for (offset = 0; offset < total2; offset += length2)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length2,&one);
    for (offset = 0; offset < total3; offset += length3)
      FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length3,&one);
  } else {
    for (offset = 0; offset < total1; offset += length1)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length1,&one);
    for (offset = 0; offset < total2; offset += length2)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length2,&one);
    for (offset = 0; offset < total3; offset += length3)
      FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one);
  }
#elif defined(FFT_T3E)
  for (offset = 0; offset < total1; offset += length1)
    FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1,
           plan->work1,&isys);
  for (offset = 0; offset < total2; offset += length2)
    FFT_1D(&flag,&length2,&scalef,&data[offset],&data[offset],plan->coeff2,
           plan->work2,&isys);
  for (offset = 0; offset < total3; offset += length3)
    FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3,
           plan->work3,&isys);
#elif defined(FFT_FFTW2)
  if (flag == -1) {
    fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0);
    fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0);
    fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0);
  } else {
    fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0);
    fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0);
    fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0);
  }
#elif defined(FFT_FFTW3)
  FFTW_API(plan) theplan;
  if (flag == -1)
    theplan=plan->plan_fast_forward;
  else
    theplan=plan->plan_fast_backward;
  FFTW_API(execute_dft)(theplan,data,data);
  if (flag == -1)
    theplan=plan->plan_mid_forward;
  else
    theplan=plan->plan_mid_backward;
  FFTW_API(execute_dft)(theplan,data,data);
  if (flag == -1)
    theplan=plan->plan_slow_forward;
  else
    theplan=plan->plan_slow_backward;
  FFTW_API(execute_dft)(theplan,data,data);
#else
  if (flag == -1) {
    for (offset = 0; offset < total1; offset += length1)
      kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]);
    for (offset = 0; offset < total2; offset += length2)
      kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]);
    for (offset = 0; offset < total3; offset += length3)
      kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]);
  } else {
    for (offset = 0; offset < total1; offset += length1)
      kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]);
    for (offset = 0; offset < total2; offset += length2)
      kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]);
    for (offset = 0; offset < total3; offset += length3)
      kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]);
  }
#endif

  // scaling if required
  // limit num to size of data

#ifndef FFT_T3E
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = MIN(plan->normnum,nsize);
    data_ptr = (FFT_SCALAR *)data;
    for (i = 0; i < num; i++) {
#if defined(FFT_FFTW3)
      *(data_ptr++) *= norm;
      *(data_ptr++) *= norm;
#elif defined(FFT_MKL)
      data[i] *= norm;
#else
      data[i].re *= norm;
      data[i].im *= norm;
#endif
    }
  }
#endif

#ifdef FFT_T3E
  if (flag == 1 && plan->scaled) {
    norm = plan->norm;
    num = MIN(plan->normnum,nsize);
    for (i = 0; i < num; i++) data[i] *= (norm,norm);
  }
#endif
}
Пример #28
0
int F77_FUNC_ ( fftw_inplace_drv_1d, FFTW_INPLACE_DRV_1D ) 
   (fftw_plan *p, int *nfft, FFTW_COMPLEX *a, int *inca, int *idist)
{
   fftw(*p, (*nfft), a, (*inca), (*idist), 0, 0, 0);
   return 0;
}
Пример #29
0
void
NormalLineArray::doThirdFFT(int zpos, int xpos, complex *val, int datasize, int fftid, int direction)
{
    LineFFTinfo &fftinfo = (infoVec[fftid]->info);        
    int ptype = fftinfo.ptype;
    complex *line = fftinfo.dataPtr;
    int sizeX = fftinfo.sizeX;
    int sizeZ = fftinfo.sizeZ;
    int *xsquare = fftinfo.xsquare;
    int *ysquare = fftinfo.ysquare;
    int *zsquare = fftinfo.zsquare;
    int expectSize=0, expectMsg=0, offset=0, i; 

    int x,y,z,idx;
    if(direction){
	int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0];
	expectSize = sendSquarewidth * ysquare[1] * zsquare[1];
	expectMsg = sizeZ/ysquare[1] * (zsquare[0]/sendSquarewidth);
	CkAssert(datasize == expectSize);
	idx=0;
	for(y=0; y<zsquare[1]; y++)
	    for(x=0; x<sendSquarewidth; x++)
		for(z=0; z<ysquare[1]; z++)
		    line[z+zpos+(x+xpos)*sizeZ+y*sizeZ*zsquare[0]] = val[idx++];
    }
    else{
	int sendSquarethick = ysquare[1]<=xsquare[1] ? ysquare[1]:xsquare[1];
	expectSize = ysquare[0]*xsquare[0] * sendSquarethick;
	expectMsg = sizeX/ysquare[0] * (xsquare[1]/sendSquarethick);
	CkAssert(datasize == expectSize);
	int idx=0;
	for(z=0; z<sendSquarethick; z++)
	    for(y=0; y<xsquare[0]; y++)
		for(x=0; x<ysquare[0]; x++)
		    line[(z+zpos)*sizeX*xsquare[0]+y*sizeX+xpos+x] = val[idx++];
    }

    infoVec[fftid]->count ++;

    if (infoVec[fftid]->count == expectMsg) {
	infoVec[fftid]->count = 0;


#ifdef HEAVYVERBOSE
	{
	char fname[80];
	if(direction)
	snprintf(fname,80,"zline_%d.x%d.y%d.out", fftid, thisIndex.x, thisIndex.y);
	else
	snprintf(fname,80,"xline_%d.y%d.z%d.out", fftid, thisIndex.x, thisIndex.y);
      FILE *fp=fopen(fname,"w");
	for(int x = 0; x < sizeX*xsquare[0]*xsquare[1]; x++)
	    fprintf(fp, "%g %g\n", line[x].re, line[x].im);
	fclose(fp);
	}
#endif


	if(direction && ptype==PencilType::ZLINE)
	    fftw(fwdplan, zsquare[0]*zsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0);
	else if(!direction && ptype==PencilType::XLINE)
	    fftw(bwdplan, xsquare[0]*xsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0); // sPencilsPerSlab many 1-D fft's 
	else
	    CkAbort("Can't do this FFT\n");
#ifdef VERBOSE
	CkPrintf("Third FFT done at [%d %d]\n", thisIndex.x, thisIndex.y);
#endif
	doneFFT(fftid, direction);
//	contribute(sizeof(int), &count, CkReduction::sum_int);
    }
}
Пример #30
0
int main(int argc, char *argv[])
{
  float *data1, *data2;
  fcomplex *ptr1, *ptr2;
  long n, npts, tmp = 0, ct, plimit, prn = 0;
  long i, isign = -1;
  double err = 0.0;
#if defined USERAWFFTW
  FILE *wisdomfile;
  fftw_plan plan_forward, plan_inverse;
  static char wisdomfilenm[120];
#endif
  struct tms runtimes;
  double ttim, stim, utim, tott;
  
  if (argc <= 1 || argc > 4) {
    printf("\nUsage:  testffts [sign (1/-1)] [print (0/1)] [frac err tol]\n\n");
    exit(0);
  } else if (argc == 2) {
    isign = atoi(argv[1]);
    prn = 0;
    err = 0.02;
  } else if (argc == 3) {
    isign = atoi(argv[1]);
    prn = atoi(argv[2]);
    err = 0.02;
  }
  if (argc == 4) {
    isign = atoi(argv[1]);
    prn = atoi(argv[2]);
    err = atof(argv[3]);
  }

  /* import the wisdom for FFTW */
#if defined USERAWFFTW
  sprintf(wisdomfilenm, "%s/fftw_wisdom.txt", DATABASE);
  wisdomfile = fopen(wisdomfilenm, "r");
  if (wisdomfile == NULL) {
    printf("Error opening '%s'.  Run makewisdom again.\n", \
	   wisdomfilenm);
    printf("Exiting.\n");
    exit(1);
  }
  if (FFTW_FAILURE == fftw_import_wisdom_from_file(wisdomfile)) {
    printf("Error importing FFTW wisdom.\n");
    printf("Exiting.\n");
    exit(1);
  }
  fclose(wisdomfile);
#endif

  for (i = 0; i <= 8; i++) {
    
    /* npts = 1 << (i + 14);        # of points in FFT */
    /*      npts = 1 << 16;	 # of points in FFT */
    /*      npts = 4096;  	 # of points in FFT */
    /*      npts = 524288;   	 # of points in FFT */
    
    npts = 300000 * (i + 1);

    n = npts << 1;	       	/* # of float vals */
    
    data1 = gen_fvect(n);
    data2 = gen_fvect(n);
    ptr1 = (fcomplex *)data1;
    ptr2 = (fcomplex *)data2;
    
    /*      make the data = {1,1,1,1,-1,-1,-1,-1} (all real) */
    /*
      for (ct = 0; ct < npts/2; ct++) {
      tmp = 2 * ct;
      data1[tmp] = 1.0;
      data1[tmp + 1] = 0.0;
      data1[tmp + npts] = -1.0;
      data1[tmp + npts + 1] = 0.0;
      data2[tmp] = 1.0;
      data2[tmp + 1] = 0.0;
      data2[tmp + npts] = -1.0;
      data2[tmp + npts + 1] = 0.0;
      }
    */
    
    /*      make the data a sin wave of fourier freq 12.12345... */
    /*
      for (ct = 0; ct < npts; ct++) {
      tmp = 2 * ct;
      data1[tmp] = sin(2.0*3.14159265358979*ct*12.12345/npts)+1.0;
      data2[tmp] = data1[tmp];
      data1[tmp+1] = 0.0;
      data2[tmp+1] = data1[tmp+1];
      }
    */
    
    /*      make the data a sin wave of fourier freq 12.12345... with noise */
    
    for (ct = 0; ct < npts; ct++) {
      tmp = 2 * ct;
      data1[tmp] = 10.0 * sin(TWOPI * ct * 12.12345 / npts) + 100.0;
      data1[tmp] = gennor(data1[tmp], 10.0);
      data2[tmp] = data1[tmp];
      data1[tmp + 1] = gennor(100.0, 10.0);
      data2[tmp + 1] = data1[tmp + 1];
    }
    
    printf("\nCalculating...\n");
    
    /*  The challenger... */
    
    tott = times(&runtimes) / (double) CLK_TCK;
    utim = runtimes.tms_utime / (double) CLK_TCK;
    stim = runtimes.tms_stime / (double) CLK_TCK;

    tablesixstepfft(ptr1, npts, isign);
    /* tablesixstepfft(plan1, plan2, ptr1, npts, isign); */
    /*  sixstepfft(ptr1, npts, isign);       */
    /*  four1(ptr1 - 1, npts, isign);        */
    /*  tablefft(ptr1, npts, isign);         */
    /*  tablesplitfft(ptr1, npts, isign);    */
    /*  realfft(ptr1, n, isign);             */
    /*  fftw(plan, 1, in, 1, 0, out, 1, 0);  */
    
    tott = times(&runtimes) / (double) CLK_TCK - tott;
    printf("Timing summary (Ransom)  npts = %ld:\n", npts);
    utim = runtimes.tms_utime / (double) CLK_TCK - utim;
    stim = runtimes.tms_stime / (double) CLK_TCK - stim;
    ttim = utim + stim;
    printf("CPU usage: %.3f sec total (%.3f sec user, %.3f sec system)\n", \
	   ttim, utim, stim);
    printf("Total time elapsed:  %.3f sec.\n\n", tott);
    
    /*  The "Standard" FFT... */
    
    /* The following is for the fftw FFT */

    /* Create new plans */
#if defined USERAWFFTW
    plan_forward = fftw_create_plan(npts, -1, FFTW_MEASURE | \
                                           FFTW_USE_WISDOM | \
                                           FFTW_IN_PLACE);
    plan_inverse = fftw_create_plan(npts, +1, FFTW_MEASURE | \
                                           FFTW_USE_WISDOM | \
                                           FFTW_IN_PLACE);
#endif

    tott = times(&runtimes) / (double) CLK_TCK;
    utim = runtimes.tms_utime / (double) CLK_TCK;
    stim = runtimes.tms_stime / (double) CLK_TCK;

    /*  four1(ptr2 - 1, npts, isign);        */
    /*  tablefft(ptr2, npts, isign);         */
    /*  tablesplitfft(ptr1, npts, isign);    */
    /*  tablesixstepfft(ptr2, npts, isign);  */
    /*  realft(ptr2 - 1, n, isign);          */
    fftwcall(ptr2, npts, -1);

#if defined USERAWFFTW
    if (isign == -1) {
      fftw(plan_forward, 1, (FFTW_COMPLEX *) ptr2, 1, 1, NULL, 1, 1);
    } else {
      fftw(plan_inverse, 1, (FFTW_COMPLEX *) ptr2, 1, 1, NULL, 1, 1);
    }
#endif

    tott = times(&runtimes) / (double) CLK_TCK - tott;
    printf("Timing summary (FFTW)  npts = %ld:\n", npts);
    utim = runtimes.tms_utime / (double) CLK_TCK - utim;
    stim = runtimes.tms_stime / (double) CLK_TCK - stim;
    ttim = utim + stim;
    printf("CPU usage: %.3f sec total (%.3f sec user, %.3f sec system)\n", \
	   ttim, utim, stim);
    printf("Total time elapsed:  %.3f sec.\n\n", tott);
    
    /* The following is for the fftw FFT */

#if defined USERAWFFTW
    fftw_destroy_plan(plan_forward);
    fftw_destroy_plan(plan_inverse);
#endif
        
    /* Check if correct with fractional errors... */
    
    for (ct = 0; ct < n; ct++) {
      if (data2[ct] != 0.0) {
	if (fabs((1.0 - (data1[ct] / data2[ct]))) > err) {
	  if ((ct % 2) == 1) {
	    printf("Values at freq %ld do not match to %4.2f%% fractional error:\n", (ct - 1) / 2, err * 100);
	    printf("  rl1 = %f  im1 = %f   rl2 = %f  im2 = %f\n",
		   data1[ct - 1], data1[ct], data2[ct - 1], data2[ct]);
	  } else {
	    printf("Values at freq %ld do not match to %4.2f%% fractional error:\n", ct / 2, err * 100);
	    printf("  rl1 = %f  im1 = %f   rl2 = %f  im2 = %f\n", data1[ct],
		   data1[ct + 1], data2[ct], data2[ct + 1]);
	  }
	}
      }
    }
    
    if (npts >= 64)
      plimit = 64;
    else
      plimit = npts;
    
    /* Print the output... */
    
    if (prn) {
      printf("\n   #1:  Challenger FFT...                      ");
      printf("#2:  Standard...\n");
      for (ct = 0; ct < plimit; ct++) {
	printf(" %3ld  rl = %12.3f   ", ct, data1[2 * ct]);
	printf("im = %12.3f    rl = %12.3f   im = %12.3f\n", \
	       data1[2 * ct + 1], data2[2 * ct], data2[2 * ct + 1]);
      }
    }

    free(data1);
    free(data2);
  }
  
  return 0;
  
}