void *fftw_malloc(size_t n) { char *p; int i; fftw_malloc_total += n; if (fftw_malloc_total > fftw_malloc_max) fftw_malloc_max = fftw_malloc_total; p = (char *) malloc(PAD_FACTOR * n + TWOINTS); if (!p) fftw_die("fftw_malloc: out of memory\n"); /* store the size in a known position */ ((int *) p)[0] = n; ((int *) p)[1] = MAGIC; for (i = 0; i < PAD_FACTOR * n; ++i) p[i + TWOINTS] = (char) (i ^ 0xDEADBEEF); ++fftw_malloc_cnt; if (fftw_malloc_cnt > fftw_malloc_cnt_max) fftw_malloc_cnt_max = fftw_malloc_cnt; /* skip the size we stored previously */ return (void *) (p + TWOINTS); }
void array_compare(fftw_complex *A, fftw_complex *B, int n) { double d = compute_error_complex(A, 1, B, 1, n); if (d > TOLERANCE) { fflush(stdout); fprintf(stderr, "Found relative error %e\n", d); fftw_die("failure in Ergun's verification procedure\n"); } }
void fftwnd(fftwnd_plan p, int howmany, fftw_complex *in, int istride, int idist, fftw_complex *out, int ostride, int odist) { fftw_complex *work; #ifdef FFTW_DEBUG if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE) && p->nwork && p->work) fftw_die("bug with FFTW_THREADSAFE flag\n"); #endif if (p->nwork && !p->work) work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex)); else work = p->work; switch (p->rank) { case 0: break; case 1: if (p->is_in_place) /* fft is in-place */ fftw(p->plans[0], howmany, in, istride, idist, work, 1, 0); else fftw(p->plans[0], howmany, in, istride, idist, out, ostride, odist); break; default: /* rank >= 2 */ { if (p->is_in_place) { out = in; ostride = istride; odist = idist; } if (howmany > 1 && odist < ostride) fftwnd_aux_howmany(p, 0, howmany, in, istride, idist, out, ostride, odist, work); else { int i; for (i = 0; i < howmany; ++i) fftwnd_aux(p, 0, in + i * idist, istride, out + i * odist, ostride, work); } } } if (p->nwork && !p->work) fftw_free(work); }
void fftw_free(void *p) { char *q; if (!p) return; q = ((char *) p) - TWOINTS; if (!q) fftw_die("fftw_free: tried to free NULL+TWOINTS pointer!\n"); { int n = ((int *) q)[0]; int magic = ((int *) q)[1]; int i; WHEN_VERBOSE( { printf("FFTW_FREE %d\n", n); fflush(stdout); }) *((int *) q) = 0; /* set to zero to detect duplicate free's */ if (magic != MAGIC) fftw_die("Wrong magic in fftw_free()!\n"); ((int *) q)[1] = ~MAGIC; if (n < 0) fftw_die("Tried to free block with corrupt size descriptor!\n"); fftw_malloc_total -= n; if (fftw_malloc_total < 0) fftw_die("fftw_malloc_total went negative!\n"); /* check for writing past end of array: */ for (i = n; i < PAD_FACTOR * n; ++i) if (q[i + TWOINTS] != (char) (i ^ 0xDEADBEEF)) { fflush(stdout); fprintf(stderr, "Byte %d past end of array has changed!\n", i - n + 1); fftw_die("Array bounds overwritten!\n"); } for (i = 0; i < PAD_FACTOR * n; ++i) q[i + TWOINTS] = (char) (i ^ 0xBEEFDEAD); --fftw_malloc_cnt; if (fftw_malloc_cnt < 0) fftw_die("fftw_malloc_cnt went negative!\n"); if (fftw_malloc_cnt == 0 && fftw_malloc_total > 0 || fftw_malloc_cnt > 0 && fftw_malloc_total == 0) fftw_die("fftw_malloc_cnt/total not zero at the same time!\n"); free(q); }
/* find a generator for the multiplicative group mod p, where p is prime */ static int find_generator(int p) { int g; for (g = 1; g < p; ++g) if (period(g, p) == p - 1) break; if (g == p) fftw_die("couldn't find generator for Rader\n"); return g; }
/* * Find the period of n in the multiplicative group mod p (p prime). * That is, return the smallest m such that n^m == 1 mod p. */ static int period(int n, int p) { int prod = n, period = 1; while (prod != 1) { prod = MULMOD(prod, n, p); ++period; if (prod == 0) fftw_die("non-prime order in Rader\n"); } return period; }
void fftw_destroy_twiddle(fftw_twiddle * tw) { fftw_twiddle **p; --tw->refcnt; if (tw->refcnt == 0) { /* remove from the list of known twiddle factors */ for (p = &twlist; p; p = &((*p)->next)) if (*p == tw) { *p = tw->next; fftw_twiddle_size -= tw->n; fftw_free(tw->twarray); fftw_free(tw); return; } fftw_die("BUG in fftw_destroy_twiddle\n"); } }
char *fftw_export_wisdom_to_string(void) { int string_length = 0; char *s, *s2; fftw_export_wisdom(emission_counter, (void *) &string_length); s = (char *) fftw_malloc(sizeof(char) * (string_length + 1)); if (!s) return 0; s2 = s; fftw_export_wisdom(string_emitter, (void *) &s2); if (s + string_length != s2) fftw_die("Unexpected output string length!\n"); return s; }
void fftw_wisdom_add(int n, int flags, fftw_direction dir, enum fftw_wisdom_category category, int istride, int ostride, enum fftw_node_type type, int signature, fftw_recurse_kind recurse_kind) { struct wisdom *p; if ((flags & FFTW_NO_VECTOR_RECURSE) && recurse_kind == FFTW_VECTOR_RECURSE) fftw_die("bug in planner (conflicting plan options)\n"); if (!(flags & FFTW_USE_WISDOM)) return; /* simply ignore if wisdom is disabled */ if (!(flags & FFTW_MEASURE)) return; /* only measurements produce wisdom */ if (fftw_wisdom_lookup(n, flags, dir, category, istride, ostride, &type, &signature, &recurse_kind, 1)) return; /* wisdom overwrote old wisdom */ p = (struct wisdom *) fftw_malloc(sizeof(struct wisdom)); p->n = n; p->flags = flags; p->dir = dir; p->category = category; p->istride = istride; p->ostride = ostride; p->type = type; p->signature = signature; p->recurse_kind = recurse_kind; /* remember this wisdom */ p->next = wisdom_list; wisdom_list = p; }
void rfftw_executor_simple(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_REAL2HC: HACK_ALIGN_STACK_ODD; (p->nodeu.real2hc.codelet) (in, out, out + n * ostride, istride, ostride, -ostride); break; case FFTW_HC2REAL: HACK_ALIGN_STACK_ODD; (p->nodeu.hc2real.codelet) (in, in + n * istride, out, istride, -istride, ostride); break; case FFTW_HC2HC: { int r = p->nodeu.hc2hc.size; int m = n / r; /* * please do resist the temptation of initializing * these variables here. Doing so forces the * compiler to keep a live variable across the * recursive call. */ fftw_hc2hc_codelet *codelet; fftw_complex *W; switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.hc2hc.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.hc2hc.recurse, istride * r, ostride, r, istride, m * ostride); #endif W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(out, W, m * ostride, m, ostride); break; case FFTW_COMPLEX_TO_REAL: W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(in, W, m * istride, m, istride); #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.hc2hc.recurse, istride, ostride * r, r, m * istride, ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.hc2hc.recurse, istride, ostride * r, r, m * istride, ostride); #endif break; default: goto bug; } break; } case FFTW_RGENERIC: { int r = p->nodeu.rgeneric.size; int m = n / r; fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet; fftw_complex *W = p->nodeu.rgeneric.tw->twarray; switch (p->nodeu.rgeneric.dir) { case FFTW_REAL_TO_COMPLEX: #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.rgeneric.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.rgeneric.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet(out, W, m, r, n, ostride); break; case FFTW_COMPLEX_TO_REAL: codelet(in, W, m, r, n, istride); #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.rgeneric.recurse, istride, ostride * r, r, m * istride, ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.rgeneric.recurse, istride, ostride * r, r, m * istride, ostride); #endif break; default: goto bug; } break; } default: bug: fftw_die("BUG in rexecutor: invalid plan\n"); break; } }
/* rexecutor_many_vector is like rexecutor_many, but it pushes the howmany loop down to the leaves of the transform: */ static void rexecutor_many_vector(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist) { switch (p->type) { case FFTW_REAL2HC: { fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, out + n * ostride + s * odist, istride, ostride, -ostride); break; } case FFTW_HC2REAL: { fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + n * istride + s * idist, out + s * odist, istride, -istride, ostride); break; } case FFTW_HC2HC: { int r = p->nodeu.hc2hc.size; int m = n / r; int i; fftw_hc2hc_codelet *codelet; fftw_complex *W; switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * istride, out + i * (m*ostride), p->nodeu.hc2hc.recurse, istride * r, ostride, howmany, idist, odist); W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; for (i = 0; i < howmany; ++i) codelet(out + i * odist, W, m * ostride, m, ostride); break; case FFTW_COMPLEX_TO_REAL: W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; for (i = 0; i < howmany; ++i) codelet(in + i * idist, W, m * istride, m, istride); for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * (m*istride), out + i * ostride, p->nodeu.hc2hc.recurse, istride, ostride * r, howmany, idist, odist); break; default: goto bug; } break; } case FFTW_RGENERIC: { int r = p->nodeu.rgeneric.size; int m = n / r; int i; fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet; fftw_complex *W = p->nodeu.rgeneric.tw->twarray; switch (p->nodeu.rgeneric.dir) { case FFTW_REAL_TO_COMPLEX: for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * istride, out + i * (m * ostride), p->nodeu.rgeneric.recurse, istride * r, ostride, howmany, idist, odist); for (i = 0; i < howmany; ++i) codelet(out + i * odist, W, m, r, n, ostride); break; case FFTW_COMPLEX_TO_REAL: for (i = 0; i < howmany; ++i) codelet(in + i * idist, W, m, r, n, istride); for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * m * istride, out + i * ostride, p->nodeu.rgeneric.recurse, istride, ostride * r, howmany, idist, odist); break; default: goto bug; } break; } default: bug: fftw_die("BUG in rexecutor: invalid plan\n"); break; } }
static fftw_rader_data *create_rader_aux(int p, int flags) { fftw_complex *omega, *work; int g, ginv, gpower; int i; FFTW_TRIG_REAL twoPiOverN; fftw_real scale = 1.0 / (p - 1); /* for convolution */ fftw_plan plan; fftw_rader_data *d; if (p < 2) fftw_die("non-prime order in Rader\n"); flags &= ~FFTW_IN_PLACE; d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data)); g = find_generator(p); ginv = power_mod(g, p - 2, p); omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); plan = fftw_create_plan(p - 1, FFTW_FORWARD, flags & ~FFTW_NO_VECTOR_RECURSE); work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p; gpower = 1; for (i = 0; i < p - 1; ++i) { c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower); c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN * gpower); gpower = MULMOD(gpower, ginv, p); } /* fft permuted roots of unity */ fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1, plan->recurse_kind); fftw_free(work); d->plan = plan; d->omega = omega; d->g = g; d->ginv = ginv; d->p = p; d->flags = flags; d->refcount = 1; d->next = NULL; d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc)); d->cdesc->name = NULL; d->cdesc->codelet = NULL; d->cdesc->size = p; d->cdesc->dir = FFTW_FORWARD; d->cdesc->type = FFTW_RADER; d->cdesc->signature = g; d->cdesc->ntwiddle = 0; d->cdesc->twiddle_order = NULL; return d; }
void rfftwnd_complex_to_real(fftwnd_plan p, int howmany, fftw_complex *in, int istride, int idist, fftw_real *out, int ostride, int odist) { fftw_complex *work = p->work; int rank = p->rank; int free_work = 0; if (p->dir != FFTW_COMPLEX_TO_REAL) fftw_die("rfftwnd_complex_to_real with real-to-complex plan"); #ifdef FFTW_DEBUG if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE) && p->nwork && p->work) fftw_die("bug with FFTW_THREADSAFE flag"); #endif if (p->is_in_place) { ostride = istride; odist = idist; odist = (idist == 1 && idist < istride) ? 1 : (idist * 2); /* ugh */ out = (fftw_real *) in; if (howmany > 1 && istride > idist && rank > 0) { int new_nwork = p->n[rank - 1] * howmany; if (new_nwork > p->nwork) { work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * new_nwork); if (!work) fftw_die("error allocating work array"); free_work = 1; } } } if (p->nwork && !work) { work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork); free_work = 1; } switch (rank) { case 0: break; case 1: if (p->is_in_place && howmany > 1 && istride > idist) rfftw_c2real_overlap_aux(p->plans[0], howmany, in, istride, idist, out, ostride, odist, (fftw_real *) work); else rfftw_c2real_aux(p->plans[0], howmany, in, istride, idist, out, ostride, odist, (fftw_real *) work); break; default: /* rank >= 2 */ { if (howmany > 1 && ostride > odist) rfftwnd_c2real_aux_howmany(p, 0, howmany, in, istride, idist, out, ostride, odist, work); else { int i; for (i = 0; i < howmany; ++i) rfftwnd_c2real_aux(p, 0, in + i * idist, istride, out + i * odist, ostride, (fftw_real *) work); } } } if (free_work) fftw_free(work); }
void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W, int m, int r, int stride, fftw_rader_data * d) { fftw_complex *tmp = (fftw_complex *) fftw_malloc((r - 1) * sizeof(fftw_complex)); int i, k, gpower = 1, g = d->g, ginv = d->ginv; fftw_real a0r, a0i; fftw_complex *omega = d->omega; for (i = 0; i < m; ++i, A += stride, W += r - 1) { /* * Here, we fft W[k-1] * A[k*(m*stride)], using Rader. * (Actually, W is pre-permuted to match the permutation that we * will do on A.) */ /* First, permute the input and multiply by W, storing in tmp: */ /* gpower == g^k mod r in the following loop */ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) { fftw_real rA, iA, rW, iW; rW = c_re(W[k]); iW = c_im(W[k]); rA = c_re(A[gpower * (m * stride)]); iA = c_im(A[gpower * (m * stride)]); c_re(tmp[k]) = rW * rA - iW * iA; c_im(tmp[k]) = rW * iA + iW * rA; } WHEN_DEBUG( { if (gpower != 1) fftw_die("incorrect generator in Rader\n"); } ); /* FFT tmp to A: */ fftw_executor_simple(r - 1, tmp, A + (m * stride), d->plan->root, 1, m * stride, d->plan->recurse_kind); /* set output DC component: */ a0r = c_re(A[0]); a0i = c_im(A[0]); c_re(A[0]) += c_re(A[(m * stride)]); c_im(A[0]) += c_im(A[(m * stride)]); /* now, multiply by omega: */ for (k = 0; k < r - 1; ++k) { fftw_real rA, iA, rW, iW; rW = c_re(omega[k]); iW = c_im(omega[k]); rA = c_re(A[(k + 1) * (m * stride)]); iA = c_im(A[(k + 1) * (m * stride)]); c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA; c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA); } /* this will add A[0] to all of the outputs after the ifft */ c_re(A[(m * stride)]) += a0r; c_im(A[(m * stride)]) -= a0i; /* inverse FFT: */ fftw_executor_simple(r - 1, A + (m * stride), tmp, d->plan->root, m * stride, 1, d->plan->recurse_kind); /* finally, do inverse permutation to unshuffle the output: */ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) { c_re(A[gpower * (m * stride)]) = c_re(tmp[k]); c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]); } WHEN_DEBUG( { if (gpower != 1) fftw_die("incorrect generator in Rader\n"); } );
/* executor_many_vector is like executor_many, but it pushes the howmany loop down to the leaves of the transform: */ static void executor_many_vector(int n, const fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist) { int s; switch (p->type) { case FFTW_NOTW: { fftw_notw_codelet *codelet = p->nodeu.notw.codelet; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, istride, ostride); break; } case FFTW_TWIDDLE: { int r = p->nodeu.twiddle.size; int m = n / r; fftw_twiddle_codelet *codelet; fftw_complex *W; for (s = 0; s < r; ++s) executor_many_vector(m, in + s * istride, out + s * (m * ostride), p->nodeu.twiddle.recurse, istride * r, ostride, howmany, idist, odist); codelet = p->nodeu.twiddle.codelet; W = p->nodeu.twiddle.tw->twarray; /* This may not be the right thing. We maybe should have the howmany loop for the twiddle codelets at the topmost level of the recursion, since odist is big; i.e. separate recursions for twiddle and notwiddle. */ HACK_ALIGN_STACK_EVEN; for (s = 0; s < howmany; ++s) codelet(out + s * odist, W, m * ostride, m, ostride); break; } case FFTW_GENERIC: { int r = p->nodeu.generic.size; int m = n / r; fftw_generic_codelet *codelet; fftw_complex *W; for (s = 0; s < r; ++s) executor_many_vector(m, in + s * istride, out + s * (m * ostride), p->nodeu.generic.recurse, istride * r, ostride, howmany, idist, odist); codelet = p->nodeu.generic.codelet; W = p->nodeu.generic.tw->twarray; for (s = 0; s < howmany; ++s) codelet(out + s * odist, W, m, r, n, ostride); break; } case FFTW_RADER: { int r = p->nodeu.rader.size; int m = n / r; fftw_rader_codelet *codelet; fftw_complex *W; for (s = 0; s < r; ++s) executor_many_vector(m, in + s * istride, out + s * (m * ostride), p->nodeu.rader.recurse, istride * r, ostride, howmany, idist, odist); codelet = p->nodeu.rader.codelet; W = p->nodeu.rader.tw->twarray; for (s = 0; s < howmany; ++s) codelet(out + s * odist, W, m, r, ostride, p->nodeu.rader.rader_data); break; } default: fftw_die("BUG in executor: invalid plan\n"); break; } }
/* * Do *not* declare simple executor static--we need to call it * from other files...also, preface its name with "fftw_" * to avoid any possible name collisions. */ void fftw_executor_simple(int n, const fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int ostride, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_NOTW: HACK_ALIGN_STACK_ODD; (p->nodeu.notw.codelet)(in, out, istride, ostride); break; case FFTW_TWIDDLE: { int r = p->nodeu.twiddle.size; int m = n / r; fftw_twiddle_codelet *codelet; fftw_complex *W; #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif executor_many(m, in, out, p->nodeu.twiddle.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else executor_many_vector(m, in, out, p->nodeu.twiddle.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet = p->nodeu.twiddle.codelet; W = p->nodeu.twiddle.tw->twarray; HACK_ALIGN_STACK_EVEN; codelet(out, W, m * ostride, m, ostride); break; } case FFTW_GENERIC: { int r = p->nodeu.generic.size; int m = n / r; fftw_generic_codelet *codelet; fftw_complex *W; #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif executor_many(m, in, out, p->nodeu.generic.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else executor_many_vector(m, in, out, p->nodeu.generic.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet = p->nodeu.generic.codelet; W = p->nodeu.generic.tw->twarray; codelet(out, W, m, r, n, ostride); break; } case FFTW_RADER: { int r = p->nodeu.rader.size; int m = n / r; fftw_rader_codelet *codelet; fftw_complex *W; #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif executor_many(m, in, out, p->nodeu.rader.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else executor_many_vector(m, in, out, p->nodeu.rader.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet = p->nodeu.rader.codelet; W = p->nodeu.rader.tw->twarray; codelet(out, W, m, r, ostride, p->nodeu.rader.rader_data); break; } default: fftw_die("BUG in executor: invalid plan\n"); break; } }
void rfftwnd_threads_complex_to_real(int nthreads, fftwnd_plan p, int howmany, fftw_complex *in, int istride, int idist, fftw_real *out, int ostride, int odist) { fftw_complex *work = 0; int rank = p->rank; int nwork = p->nwork, size_work = nwork * nthreads; if (p->dir != FFTW_COMPLEX_TO_REAL) fftw_die("rfftwnd_complex_to_real with real-to-complex plan"); if (p->is_in_place) { ostride = istride; odist = idist; odist = (idist == 1) ? 1 : (idist * 2); /* ugh */ out = (fftw_real *) in; if (howmany > 1 && istride > idist && rank > 0) { int new_nwork = p->n[rank - 1] * howmany; if (new_nwork > nwork) nwork = new_nwork; if (rank != 1) { if (nwork * nthreads > size_work) size_work = nwork * nthreads; } else size_work = nwork; } } work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * size_work); switch (rank) { case 0: break; case 1: if (p->is_in_place && howmany > 1 && istride > idist) rfftw_c2real_overlap_threads_aux(p->plans[0], howmany, in, istride, idist, out, ostride, odist, (fftw_real *) work, nthreads); else rfftw_c2real_threads_aux(p->plans[0], howmany, in, istride, idist, out, ostride, odist, (fftw_real *) work, nthreads); break; default: /* rank >= 2 */ { if (howmany > 1 && ostride > odist) rfftwnd_c2real_aux_howmany_threads(p, 0, howmany, in, istride, idist, out, ostride, odist, work, nwork, nthreads); else { int i; for (i = 0; i < howmany; ++i) rfftwnd_c2real_threads_aux(p, 0, in + i * idist, istride, out + i * odist, ostride, work, nthreads); } } } fftw_free(work); }