Example #1
0
void *fftw_malloc(size_t n)
{
     char *p;
     int i;

     fftw_malloc_total += n;

     if (fftw_malloc_total > fftw_malloc_max)
	  fftw_malloc_max = fftw_malloc_total;

     p = (char *) malloc(PAD_FACTOR * n + TWOINTS);
     if (!p)
	  fftw_die("fftw_malloc: out of memory\n");

     /* store the size in a known position */
     ((int *) p)[0] = n;
     ((int *) p)[1] = MAGIC;
     for (i = 0; i < PAD_FACTOR * n; ++i)
	  p[i + TWOINTS] = (char) (i ^ 0xDEADBEEF);

     ++fftw_malloc_cnt;

     if (fftw_malloc_cnt > fftw_malloc_cnt_max)
	  fftw_malloc_cnt_max = fftw_malloc_cnt;

     /* skip the size we stored previously */
     return (void *) (p + TWOINTS);
}
Example #2
0
void array_compare(fftw_complex *A, fftw_complex *B, int n)
{
     double d = compute_error_complex(A, 1, B, 1, n);
     if (d > TOLERANCE) {
	  fflush(stdout);
	  fprintf(stderr, "Found relative error %e\n", d);
	  fftw_die("failure in Ergun's verification procedure\n");
     }
}
Example #3
0
File: fftwnd.c Project: Pinkii-/PCA
void fftwnd(fftwnd_plan p, int howmany,
	    fftw_complex *in, int istride, int idist,
	    fftw_complex *out, int ostride, int odist)
{
     fftw_complex *work;

#ifdef FFTW_DEBUG
     if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
	 && p->nwork && p->work)
	  fftw_die("bug with FFTW_THREADSAFE flag\n");
#endif

     if (p->nwork && !p->work)
	  work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex));
     else
	  work = p->work;

     switch (p->rank) {
	 case 0:
	      break;
	 case 1:
	      if (p->is_in_place)	/* fft is in-place */
		   fftw(p->plans[0], howmany, in, istride, idist,
			work, 1, 0);
	      else
		   fftw(p->plans[0], howmany, in, istride, idist,
			out, ostride, odist);
	      break;
	 default:		/* rank >= 2 */
	      {
		   if (p->is_in_place) {
			out = in;
			ostride = istride;
			odist = idist;
		   }
		   if (howmany > 1 && odist < ostride)
			fftwnd_aux_howmany(p, 0, howmany,
					   in, istride, idist,
					   out, ostride, odist,
					   work);
		   else {
			int i;

			for (i = 0; i < howmany; ++i)
			     fftwnd_aux(p, 0,
					in + i * idist, istride,
					out + i * odist, ostride,
					work);
		   }
	      }
     }

     if (p->nwork && !p->work)
	  fftw_free(work);

}
Example #4
0
void fftw_free(void *p)
{
     char *q;
     
     if (!p)
	  return;

     q = ((char *) p) - TWOINTS;
     if (!q)
	  fftw_die("fftw_free: tried to free NULL+TWOINTS pointer!\n");

     {
	  int n = ((int *) q)[0];
	  int magic = ((int *) q)[1];
	  int i;

	  WHEN_VERBOSE( {
		       printf("FFTW_FREE %d\n", n);
		       fflush(stdout);
		       })

	  *((int *) q) = 0;	/* set to zero to detect duplicate free's */

	  if (magic != MAGIC)
	       fftw_die("Wrong magic in fftw_free()!\n");
	  ((int *) q)[1] = ~MAGIC;

	  if (n < 0)
	       fftw_die("Tried to free block with corrupt size descriptor!\n");

	  fftw_malloc_total -= n;

	  if (fftw_malloc_total < 0)
	       fftw_die("fftw_malloc_total went negative!\n");

	  /* check for writing past end of array: */
	  for (i = n; i < PAD_FACTOR * n; ++i)
	       if (q[i + TWOINTS] != (char) (i ^ 0xDEADBEEF)) {
		    fflush(stdout);
		    fprintf(stderr, "Byte %d past end of array has changed!\n",
			    i - n + 1);
		    fftw_die("Array bounds overwritten!\n");
	       }
	  for (i = 0; i < PAD_FACTOR * n; ++i)
	       q[i + TWOINTS] = (char) (i ^ 0xBEEFDEAD);

	  --fftw_malloc_cnt;

	  if (fftw_malloc_cnt < 0)
	       fftw_die("fftw_malloc_cnt went negative!\n");

	  if (fftw_malloc_cnt == 0 && fftw_malloc_total > 0 ||
	      fftw_malloc_cnt > 0 && fftw_malloc_total == 0)
	       fftw_die("fftw_malloc_cnt/total not zero at the same time!\n");

	  free(q);
     }
Example #5
0
File: rader.c Project: Pinkii-/PCA
/* find a generator for the multiplicative group mod p, where p is prime */
static int find_generator(int p)
{
     int g;

     for (g = 1; g < p; ++g)
	  if (period(g, p) == p - 1)
	       break;
     if (g == p)
	  fftw_die("couldn't find generator for Rader\n");
     return g;
}
Example #6
0
File: rader.c Project: Pinkii-/PCA
/*
 * Find the period of n in the multiplicative group mod p (p prime).
 * That is, return the smallest m such that n^m == 1 mod p.
 */
static int period(int n, int p)
{
     int prod = n, period = 1;

     while (prod != 1) {
	  prod = MULMOD(prod, n, p);
	  ++period;
	  if (prod == 0)
	       fftw_die("non-prime order in Rader\n");
     }
     return period;
}
Example #7
0
void fftw_destroy_twiddle(fftw_twiddle * tw)
{
     fftw_twiddle **p;
     --tw->refcnt;

     if (tw->refcnt == 0) {
	  /* remove from the list of known twiddle factors */
	  for (p = &twlist; p; p = &((*p)->next))
	       if (*p == tw) {
		    *p = tw->next;
		    fftw_twiddle_size -= tw->n;
		    fftw_free(tw->twarray);
		    fftw_free(tw);
		    return;
	       }
	  fftw_die("BUG in fftw_destroy_twiddle\n");
     }
}
char *fftw_export_wisdom_to_string(void)
{
     int string_length = 0;
     char *s, *s2;

     fftw_export_wisdom(emission_counter, (void *) &string_length);

     s = (char *) fftw_malloc(sizeof(char) * (string_length + 1));
     if (!s)
	  return 0;
     s2 = s;

     fftw_export_wisdom(string_emitter, (void *) &s2);

     if (s + string_length != s2)
	  fftw_die("Unexpected output string length!\n");

     return s;
}
Example #9
0
void fftw_wisdom_add(int n, int flags, fftw_direction dir,
		     enum fftw_wisdom_category category,
		     int istride, int ostride,
		     enum fftw_node_type type,
		     int signature,
		     fftw_recurse_kind recurse_kind)
{
     struct wisdom *p;

     if ((flags & FFTW_NO_VECTOR_RECURSE) && 
	 recurse_kind == FFTW_VECTOR_RECURSE)
	  fftw_die("bug in planner (conflicting plan options)\n");

     if (!(flags & FFTW_USE_WISDOM))
	  return;		/* simply ignore if wisdom is disabled */

     if (!(flags & FFTW_MEASURE))
	  return;		/* only measurements produce wisdom */

     if (fftw_wisdom_lookup(n, flags, dir, category, istride, ostride,
			    &type, &signature, &recurse_kind, 1))
	  return;		/* wisdom overwrote old wisdom */

     p = (struct wisdom *) fftw_malloc(sizeof(struct wisdom));

     p->n = n;
     p->flags = flags;
     p->dir = dir;
     p->category = category;
     p->istride = istride;
     p->ostride = ostride;
     p->type = type;
     p->signature = signature;
     p->recurse_kind = recurse_kind;

     /* remember this wisdom */
     p->next = wisdom_list;
     wisdom_list = p;
}
Example #10
0
void rfftw_executor_simple(int n, fftw_real *in,
			   fftw_real *out,
			   fftw_plan_node *p,
			   int istride,
			   int ostride,
			   fftw_recurse_kind recurse_kind)
{
     switch (p->type) {
	 case FFTW_REAL2HC:
	      HACK_ALIGN_STACK_ODD;
	      (p->nodeu.real2hc.codelet) (in, out, out + n * ostride,
					  istride, ostride, -ostride);
	      break;

	 case FFTW_HC2REAL:
	      HACK_ALIGN_STACK_ODD;
	      (p->nodeu.hc2real.codelet) (in, in + n * istride, out,
					  istride, -istride, ostride);
	      break;

	 case FFTW_HC2HC:
	      {
		   int r = p->nodeu.hc2hc.size;
		   int m = n / r;
		   /* 
		    * please do resist the temptation of initializing
		    * these variables here.  Doing so forces the
		    * compiler to keep a live variable across the
		    * recursive call.
		    */
		   fftw_hc2hc_codelet *codelet;
		   fftw_complex *W;

		   switch (p->nodeu.hc2hc.dir) {
		       case FFTW_REAL_TO_COMPLEX:
#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
				 rexecutor_many(m, in, out,
						p->nodeu.hc2hc.recurse,
						istride * r, ostride,
						r, istride, m * ostride,
						FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    else
				 rexecutor_many_vector(m, in, out,
						p->nodeu.hc2hc.recurse,
						istride * r, ostride,
						r, istride, m * ostride);
#endif

			    W = p->nodeu.hc2hc.tw->twarray;
			    codelet = p->nodeu.hc2hc.codelet;
			    HACK_ALIGN_STACK_EVEN;
			    codelet(out, W, m * ostride, m, ostride);
			    break;
		       case FFTW_COMPLEX_TO_REAL:
			    W = p->nodeu.hc2hc.tw->twarray;
			    codelet = p->nodeu.hc2hc.codelet;
			    HACK_ALIGN_STACK_EVEN;
			    codelet(in, W, m * istride, m, istride);

#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
				 rexecutor_many(m, in, out,
						p->nodeu.hc2hc.recurse,
						istride, ostride * r,
						r, m * istride, ostride,
						FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    else
				 rexecutor_many_vector(m, in, out,
						p->nodeu.hc2hc.recurse,
						istride, ostride * r,
						r, m * istride, ostride);
#endif
			    break;
		       default:
			    goto bug;
		   }

		   break;
	      }

	 case FFTW_RGENERIC:
	      {
		   int r = p->nodeu.rgeneric.size;
		   int m = n / r;
		   fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet;
		   fftw_complex *W = p->nodeu.rgeneric.tw->twarray;

		   switch (p->nodeu.rgeneric.dir) {
		       case FFTW_REAL_TO_COMPLEX:
#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
				 rexecutor_many(m, in, out,
						p->nodeu.rgeneric.recurse,
						istride * r, ostride,
						r, istride, m * ostride,
						FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    else
				 rexecutor_many_vector(m, in, out,
						p->nodeu.rgeneric.recurse,
						istride * r, ostride,
						r, istride, m * ostride);
#endif

			    codelet(out, W, m, r, n, ostride);
			    break;
		       case FFTW_COMPLEX_TO_REAL:
			    codelet(in, W, m, r, n, istride);

#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
				 rexecutor_many(m, in, out,
						p->nodeu.rgeneric.recurse,
						istride, ostride * r,
						r, m * istride, ostride,
						FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
			    else
				 rexecutor_many_vector(m, in, out,
						p->nodeu.rgeneric.recurse,
						istride, ostride * r,
						r, m * istride, ostride);
#endif
			    break;
		       default:
			    goto bug;
		   }

		   break;
	      }

	 default:
	    bug:
	      fftw_die("BUG in rexecutor: invalid plan\n");
	      break;
     }
}
Example #11
0
/* rexecutor_many_vector is like rexecutor_many, but it pushes the
   howmany loop down to the leaves of the transform: */
static void rexecutor_many_vector(int n, fftw_real *in,
				  fftw_real *out,
				  fftw_plan_node *p,
				  int istride,
				  int ostride,
				  int howmany, int idist, int odist)
{
     switch (p->type) {
	 case FFTW_REAL2HC:
	      {
		   fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
		   int s;

		   HACK_ALIGN_STACK_ODD;
		   for (s = 0; s < howmany; ++s)
			codelet(in + s * idist, out + s * odist,
				out + n * ostride + s * odist,
				istride, ostride, -ostride);
		   break;
	      }

	 case FFTW_HC2REAL:
	      {
		   fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
		   int s;

		   HACK_ALIGN_STACK_ODD;
		   for (s = 0; s < howmany; ++s)
			codelet(in + s * idist, in + n * istride + s * idist,
				out + s * odist,
				istride, -istride, ostride);
		   break;
	      }

	 case FFTW_HC2HC:
	      {
		   int r = p->nodeu.hc2hc.size;
		   int m = n / r;
		   int i;
		   fftw_hc2hc_codelet *codelet;
		   fftw_complex *W;

		   switch (p->nodeu.hc2hc.dir) {
		       case FFTW_REAL_TO_COMPLEX:
			    for (i = 0; i < r; ++i)
				 rexecutor_many_vector(m, in + i * istride,
						       out + i * (m*ostride),
						       p->nodeu.hc2hc.recurse,
						       istride * r, ostride,
						       howmany, idist, odist);

			    W = p->nodeu.hc2hc.tw->twarray;
			    codelet = p->nodeu.hc2hc.codelet;
			    HACK_ALIGN_STACK_EVEN;
			    for (i = 0; i < howmany; ++i)
				 codelet(out + i * odist, 
					 W, m * ostride, m, ostride);
			    break;
		       case FFTW_COMPLEX_TO_REAL:
			    W = p->nodeu.hc2hc.tw->twarray;
			    codelet = p->nodeu.hc2hc.codelet;
			    HACK_ALIGN_STACK_EVEN;
			    for (i = 0; i < howmany; ++i)
				 codelet(in + i * idist,
					 W, m * istride, m, istride);

			    for (i = 0; i < r; ++i)
				 rexecutor_many_vector(m, in + i * (m*istride),
						       out + i * ostride,
						       p->nodeu.hc2hc.recurse,
						       istride, ostride * r,
						       howmany, idist, odist);
			    break;
		       default:
			    goto bug;
		   }

		   break;
	      }

	 case FFTW_RGENERIC:
	      {
		   int r = p->nodeu.rgeneric.size;
		   int m = n / r;
		   int i;
		   fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet;
		   fftw_complex *W = p->nodeu.rgeneric.tw->twarray;

		   switch (p->nodeu.rgeneric.dir) {
		       case FFTW_REAL_TO_COMPLEX:
			    for (i = 0; i < r; ++i)
				 rexecutor_many_vector(m, in + i * istride,
						 out + i * (m * ostride),
					       p->nodeu.rgeneric.recurse,
						   istride * r, ostride,
						       howmany, idist, odist);

			    for (i = 0; i < howmany; ++i)
				 codelet(out + i * odist, W, m, r, n, ostride);
			    break;
		       case FFTW_COMPLEX_TO_REAL:
			    for (i = 0; i < howmany; ++i)
				 codelet(in + i * idist, W, m, r, n, istride);

			    for (i = 0; i < r; ++i)
				 rexecutor_many_vector(m, in + i * m * istride,
						       out + i * ostride,
					       p->nodeu.rgeneric.recurse,
						   istride, ostride * r,
						       howmany, idist, odist);
			    break;
		       default:
			    goto bug;
		   }

		   break;
	      }

	 default:
	    bug:
	      fftw_die("BUG in rexecutor: invalid plan\n");
	      break;
     }
}
Example #12
0
File: rader.c Project: Pinkii-/PCA
static fftw_rader_data *create_rader_aux(int p, int flags)
{
     fftw_complex *omega, *work;
     int g, ginv, gpower;
     int i;
     FFTW_TRIG_REAL twoPiOverN;
     fftw_real scale = 1.0 / (p - 1);	/* for convolution */
     fftw_plan plan;
     fftw_rader_data *d;

     if (p < 2)
	  fftw_die("non-prime order in Rader\n");

     flags &= ~FFTW_IN_PLACE;

     d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data));

     g = find_generator(p);
     ginv = power_mod(g, p - 2, p);

     omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));

     plan = fftw_create_plan(p - 1, FFTW_FORWARD,
			     flags & ~FFTW_NO_VECTOR_RECURSE);

     work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));

     twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p;
     gpower = 1;
     for (i = 0; i < p - 1; ++i) {
	  c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower);
	  c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN 
							       * gpower);
	  gpower = MULMOD(gpower, ginv, p);
     }

     /* fft permuted roots of unity */
     fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1,
			  plan->recurse_kind);

     fftw_free(work);

     d->plan = plan;
     d->omega = omega;
     d->g = g;
     d->ginv = ginv;
     d->p = p;
     d->flags = flags;
     d->refcount = 1;
     d->next = NULL;

     d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc));
     d->cdesc->name = NULL;
     d->cdesc->codelet = NULL;
     d->cdesc->size = p;
     d->cdesc->dir = FFTW_FORWARD;
     d->cdesc->type = FFTW_RADER;
     d->cdesc->signature = g;
     d->cdesc->ntwiddle = 0;
     d->cdesc->twiddle_order = NULL;
     return d;
}
Example #13
0
void rfftwnd_complex_to_real(fftwnd_plan p, int howmany,
			     fftw_complex *in, int istride, int idist,
			     fftw_real *out, int ostride, int odist)
{
     fftw_complex *work = p->work;
     int rank = p->rank;
     int free_work = 0;

     if (p->dir != FFTW_COMPLEX_TO_REAL)
	  fftw_die("rfftwnd_complex_to_real with real-to-complex plan");

#ifdef FFTW_DEBUG
     if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
	 && p->nwork && p->work)
	  fftw_die("bug with FFTW_THREADSAFE flag");
#endif

     if (p->is_in_place) {
	  ostride = istride;
	  odist = idist;
	  odist = (idist == 1 && idist < istride) ? 1 : (idist * 2);  /* ugh */
	  out = (fftw_real *) in;
	  if (howmany > 1 && istride > idist && rank > 0) {
	       int new_nwork = p->n[rank - 1] * howmany;
	       if (new_nwork > p->nwork) {
		    work = (fftw_complex *)
			fftw_malloc(sizeof(fftw_complex) * new_nwork);
		    if (!work)
			 fftw_die("error allocating work array");
		    free_work = 1;
	       }
	  }
     }
     if (p->nwork && !work) {
	  work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
	  free_work = 1;
     }
     switch (rank) {
	 case 0:
	      break;
	 case 1:
	      if (p->is_in_place && howmany > 1 && istride > idist)
		   rfftw_c2real_overlap_aux(p->plans[0], howmany,
					    in, istride, idist,
					    out, ostride, odist,
					    (fftw_real *) work);
	      else
		   rfftw_c2real_aux(p->plans[0], howmany,
				    in, istride, idist,
				    out, ostride, odist,
				    (fftw_real *) work);
	      break;
	 default:		/* rank >= 2 */
	      {
		   if (howmany > 1 && ostride > odist)
			rfftwnd_c2real_aux_howmany(p, 0, howmany,
						   in, istride, idist,
						   out, ostride, odist,
						   work);
		   else {
			int i;

			for (i = 0; i < howmany; ++i)
			     rfftwnd_c2real_aux(p, 0,
						in + i * idist, istride,
						out + i * odist, ostride,
						(fftw_real *) work);
		   }
	      }
     }

     if (free_work)
	  fftw_free(work);
}
Example #14
0
File: rader.c Project: Pinkii-/PCA
void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W,
			int m, int r, int stride,
			fftw_rader_data * d)
{
     fftw_complex *tmp = (fftw_complex *)
     fftw_malloc((r - 1) * sizeof(fftw_complex));
     int i, k, gpower = 1, g = d->g, ginv = d->ginv;
     fftw_real a0r, a0i;
     fftw_complex *omega = d->omega;

     for (i = 0; i < m; ++i, A += stride, W += r - 1) {
	  /* 
	   * Here, we fft W[k-1] * A[k*(m*stride)], using Rader.
	   * (Actually, W is pre-permuted to match the permutation that we 
	   * will do on A.) 
	   */

	  /* First, permute the input and multiply by W, storing in tmp: */
	  /* gpower == g^k mod r in the following loop */
	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
	       fftw_real rA, iA, rW, iW;
	       rW = c_re(W[k]);
	       iW = c_im(W[k]);
	       rA = c_re(A[gpower * (m * stride)]);
	       iA = c_im(A[gpower * (m * stride)]);
	       c_re(tmp[k]) = rW * rA - iW * iA;
	       c_im(tmp[k]) = rW * iA + iW * rA;
	  }

	  WHEN_DEBUG( {
		     if (gpower != 1)
		     fftw_die("incorrect generator in Rader\n");
		     }
	  );

	  /* FFT tmp to A: */
	  fftw_executor_simple(r - 1, tmp, A + (m * stride),
			       d->plan->root, 1, m * stride,
			       d->plan->recurse_kind);

	  /* set output DC component: */
	  a0r = c_re(A[0]);
	  a0i = c_im(A[0]);
	  c_re(A[0]) += c_re(A[(m * stride)]);
	  c_im(A[0]) += c_im(A[(m * stride)]);

	  /* now, multiply by omega: */
	  for (k = 0; k < r - 1; ++k) {
	       fftw_real rA, iA, rW, iW;
	       rW = c_re(omega[k]);
	       iW = c_im(omega[k]);
	       rA = c_re(A[(k + 1) * (m * stride)]);
	       iA = c_im(A[(k + 1) * (m * stride)]);
	       c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
	       c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
	  }

	  /* this will add A[0] to all of the outputs after the ifft */
	  c_re(A[(m * stride)]) += a0r;
	  c_im(A[(m * stride)]) -= a0i;

	  /* inverse FFT: */
	  fftw_executor_simple(r - 1, A + (m * stride), tmp,
			       d->plan->root, m * stride, 1,
			       d->plan->recurse_kind);

	  /* finally, do inverse permutation to unshuffle the output: */
	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
	       c_re(A[gpower * (m * stride)]) = c_re(tmp[k]);
	       c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]);
	  }

	  WHEN_DEBUG( {
		     if (gpower != 1)
		     fftw_die("incorrect generator in Rader\n");
		     }
	  );
Example #15
0
/* executor_many_vector is like executor_many, but it pushes the
   howmany loop down to the leaves of the transform: */
static void executor_many_vector(int n, const fftw_complex *in,
				 fftw_complex *out,
				 fftw_plan_node *p,
				 int istride,
				 int ostride,
				 int howmany, int idist, int odist)
{
     int s;

     switch (p->type) {
	 case FFTW_NOTW:
	      {
		   fftw_notw_codelet *codelet = p->nodeu.notw.codelet;

		   HACK_ALIGN_STACK_ODD;
		   for (s = 0; s < howmany; ++s)
			codelet(in + s * idist,
				out + s * odist,
				istride, ostride);
		   break;
	      }

	 case FFTW_TWIDDLE:
	      {
		   int r = p->nodeu.twiddle.size;
		   int m = n / r;
		   fftw_twiddle_codelet *codelet;
		   fftw_complex *W;

		   for (s = 0; s < r; ++s)
			executor_many_vector(m, in + s * istride, 
					     out + s * (m * ostride),
					     p->nodeu.twiddle.recurse,
					     istride * r, ostride,
					     howmany, idist, odist);

		   codelet = p->nodeu.twiddle.codelet;
		   W = p->nodeu.twiddle.tw->twarray;

		   /* This may not be the right thing.  We maybe should have
		      the howmany loop for the twiddle codelets at the
		      topmost level of the recursion, since odist is big;
		      i.e. separate recursions for twiddle and notwiddle. */
		   HACK_ALIGN_STACK_EVEN;
		   for (s = 0; s < howmany; ++s)
			codelet(out + s * odist, W, m * ostride, m, ostride);

		   break;
	      }

	 case FFTW_GENERIC:
	      {
		   int r = p->nodeu.generic.size;
		   int m = n / r;
		   fftw_generic_codelet *codelet;
		   fftw_complex *W;

		   for (s = 0; s < r; ++s)
			executor_many_vector(m, in + s * istride, 
					     out + s * (m * ostride),
					     p->nodeu.generic.recurse,
					     istride * r, ostride,
					     howmany, idist, odist);

		   codelet = p->nodeu.generic.codelet;
		   W = p->nodeu.generic.tw->twarray;
		   for (s = 0; s < howmany; ++s)
			codelet(out + s * odist, W, m, r, n, ostride);

		   break;
	      }

	 case FFTW_RADER:
	      {
		   int r = p->nodeu.rader.size;
		   int m = n / r;
		   fftw_rader_codelet *codelet;
		   fftw_complex *W;

		   for (s = 0; s < r; ++s)
			executor_many_vector(m, in + s * istride, 
					     out + s * (m * ostride),
					     p->nodeu.rader.recurse,
					     istride * r, ostride,
					     howmany, idist, odist);

		   codelet = p->nodeu.rader.codelet;
		   W = p->nodeu.rader.tw->twarray;
		   for (s = 0; s < howmany; ++s)
			codelet(out + s * odist, W, m, r, ostride,
				p->nodeu.rader.rader_data);

		   break;
	      }

	 default:
	      fftw_die("BUG in executor: invalid plan\n");
	      break;
     }     
}
Example #16
0
/*
 * Do *not* declare simple executor static--we need to call it
 * from other files...also, preface its name with "fftw_"
 * to avoid any possible name collisions. 
 */
void fftw_executor_simple(int n, const fftw_complex *in,
			  fftw_complex *out,
			  fftw_plan_node *p,
			  int istride,
			  int ostride,
			  fftw_recurse_kind recurse_kind)
{
     switch (p->type) {
	 case FFTW_NOTW:
	      HACK_ALIGN_STACK_ODD;
	      (p->nodeu.notw.codelet)(in, out, istride, ostride);
	      break;

	 case FFTW_TWIDDLE:
	      {
		   int r = p->nodeu.twiddle.size;
		   int m = n / r;
		   fftw_twiddle_codelet *codelet;
		   fftw_complex *W;

#ifdef FFTW_ENABLE_VECTOR_RECURSE
		   if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
			executor_many(m, in, out,
				      p->nodeu.twiddle.recurse,
				      istride * r, ostride,
				      r, istride, m * ostride,
				      FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
		   else
			executor_many_vector(m, in, out,
					     p->nodeu.twiddle.recurse,
					     istride * r, ostride,
					     r, istride, m * ostride);
#endif

		   codelet = p->nodeu.twiddle.codelet;
		   W = p->nodeu.twiddle.tw->twarray;

		   HACK_ALIGN_STACK_EVEN;
		   codelet(out, W, m * ostride, m, ostride);

		   break;
	      }

	 case FFTW_GENERIC:
	      {
		   int r = p->nodeu.generic.size;
		   int m = n / r;
		   fftw_generic_codelet *codelet;
		   fftw_complex *W;

#ifdef FFTW_ENABLE_VECTOR_RECURSE
		   if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
			executor_many(m, in, out,
				      p->nodeu.generic.recurse,
				      istride * r, ostride,
				      r, istride, m * ostride,
                                      FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
		   else
			executor_many_vector(m, in, out,
					     p->nodeu.generic.recurse,
					     istride * r, ostride,
					     r, istride, m * ostride);
#endif

		   codelet = p->nodeu.generic.codelet;
		   W = p->nodeu.generic.tw->twarray;
		   codelet(out, W, m, r, n, ostride);

		   break;
	      }

	 case FFTW_RADER:
	      {
		   int r = p->nodeu.rader.size;
		   int m = n / r;
		   fftw_rader_codelet *codelet;
		   fftw_complex *W;

#ifdef FFTW_ENABLE_VECTOR_RECURSE
		   if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
			executor_many(m, in, out,
				      p->nodeu.rader.recurse,
				      istride * r, ostride,
				      r, istride, m * ostride,
                                      FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
		   else
			executor_many_vector(m, in, out,
					     p->nodeu.rader.recurse,
					     istride * r, ostride,
					     r, istride, m * ostride);
#endif

		   codelet = p->nodeu.rader.codelet;
		   W = p->nodeu.rader.tw->twarray;
		   codelet(out, W, m, r, ostride,
			   p->nodeu.rader.rader_data);

		   break;
	      }

	 default:
	      fftw_die("BUG in executor: invalid plan\n");
	      break;
     }
}
Example #17
0
void rfftwnd_threads_complex_to_real(int nthreads, fftwnd_plan p, int howmany,
				     fftw_complex *in, int istride, int idist,
				     fftw_real *out, int ostride, int odist)
{
     fftw_complex *work = 0;
     int rank = p->rank;
     int nwork = p->nwork, size_work = nwork * nthreads;

     if (p->dir != FFTW_COMPLEX_TO_REAL)
	  fftw_die("rfftwnd_complex_to_real with real-to-complex plan");

     if (p->is_in_place) {
	  ostride = istride;
	  odist = idist;
	  odist = (idist == 1) ? 1 : (idist * 2);	/* ugh */
	  out = (fftw_real *) in;
	  if (howmany > 1 && istride > idist && rank > 0) {
	       int new_nwork = p->n[rank - 1] * howmany;
	       if (new_nwork > nwork)
		    nwork = new_nwork;
	       if (rank != 1) {
		    if (nwork * nthreads > size_work)
			 size_work = nwork * nthreads;
	       }
	       else
		    size_work = nwork;
	  }
     }

     work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * size_work);

     switch (rank) {
	 case 0:
	      break;
	 case 1:
              if (p->is_in_place && howmany > 1 && istride > idist)
                   rfftw_c2real_overlap_threads_aux(p->plans[0], howmany,
                                                    in, istride, idist,
                                                    out, ostride, odist,
                                                    (fftw_real *) work,
                                                    nthreads);
              else
                   rfftw_c2real_threads_aux(p->plans[0], howmany,
                                            in, istride, idist,
                                            out, ostride, odist,
                                            (fftw_real *) work, nthreads);
	      break;
	 default:		/* rank >= 2 */
	      {
		   if (howmany > 1 && ostride > odist)
                       rfftwnd_c2real_aux_howmany_threads(p, 0, howmany,
                                                           in, istride, idist,
                                                           out, ostride, odist,
                                                           work, nwork,
                                                           nthreads);
		   else {
			int i;

			for (i = 0; i < howmany; ++i)
                             rfftwnd_c2real_threads_aux(p, 0,
                                                        in + i * idist,
                                                        istride,
                                                        out + i * odist,
                                                        ostride,
                                                        work,
                                                        nthreads);
		   }
	      }
     }

     fftw_free(work);
}