static void executor_simple_inplace(int n, fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_NOTW: HACK_ALIGN_STACK_ODD; (p->nodeu.notw.codelet)(in, in, istride, istride); break; default: { fftw_complex *tmp; if (out) tmp = out; else tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); fftw_executor_simple(n, in, tmp, p, istride, 1, recurse_kind); fftw_strided_copy(n, tmp, istride, in); if (!out) fftw_free(tmp); } } }
static void executor_many(int n, const fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist, fftw_recurse_kind recurse_kind) { int s; switch (p->type) { case FFTW_NOTW: { fftw_notw_codelet *codelet = p->nodeu.notw.codelet; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, istride, ostride); break; } default: for (s = 0; s < howmany; ++s) fftw_executor_simple(n, in + s * idist, out + s * odist, p, istride, ostride, recurse_kind); } }
void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out) { int n = plan->n; if (plan->flags & FFTW_IN_PLACE) executor_simple_inplace(n, in, out, plan->root, 1, plan->recurse_kind); else fftw_executor_simple(n, in, out, plan->root, 1, 1, plan->recurse_kind); }
/* user interface */ void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride, int idist, fftw_complex *out, int ostride, int odist) { int n = plan->n; if (plan->flags & FFTW_IN_PLACE) { if (howmany == 1) { executor_simple_inplace(n, in, out, plan->root, istride, plan->recurse_kind); } else { executor_many_inplace(n, in, out, plan->root, istride, howmany, idist, plan->recurse_kind); } } else { if (howmany == 1) { fftw_executor_simple(n, in, out, plan->root, istride, ostride, plan->recurse_kind); } else { #ifdef FFTW_ENABLE_VECTOR_RECURSE int vector_size = plan->vector_size; if (vector_size <= 1) #endif executor_many(n, in, out, plan->root, istride, ostride, howmany, idist, odist, plan->recurse_kind); #ifdef FFTW_ENABLE_VECTOR_RECURSE else { int s; int num_vects = howmany / vector_size; fftw_plan_node *root = plan->root; for (s = 0; s < num_vects; ++s) executor_many_vector(n, in + s * (vector_size * idist), out + s * (vector_size * odist), root, istride, ostride, vector_size, idist, odist); s = howmany % vector_size; if (s > 0) executor_many(n, in + num_vects * (vector_size * idist), out + num_vects * (vector_size * odist), root, istride, ostride, s, idist, odist, FFTW_NORMAL_RECURSE); } #endif } } }
static void executor_many_inplace(int n, fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int howmany, int idist, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_NOTW: { fftw_notw_codelet *codelet = p->nodeu.notw.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + s * idist, istride, istride); break; } default: { int s; fftw_complex *tmp; if (out) tmp = out; else tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); for (s = 0; s < howmany; ++s) { fftw_executor_simple(n, in + s * idist, tmp, p, istride, 1, recurse_kind); fftw_strided_copy(n, tmp, istride, in + s * idist); } if (!out) fftw_free(tmp); } } }
static fftw_rader_data *create_rader_aux(int p, int flags) { fftw_complex *omega, *work; int g, ginv, gpower; int i; FFTW_TRIG_REAL twoPiOverN; fftw_real scale = 1.0 / (p - 1); /* for convolution */ fftw_plan plan; fftw_rader_data *d; if (p < 2) fftw_die("non-prime order in Rader\n"); flags &= ~FFTW_IN_PLACE; d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data)); g = find_generator(p); ginv = power_mod(g, p - 2, p); omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); plan = fftw_create_plan(p - 1, FFTW_FORWARD, flags & ~FFTW_NO_VECTOR_RECURSE); work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p; gpower = 1; for (i = 0; i < p - 1; ++i) { c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower); c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN * gpower); gpower = MULMOD(gpower, ginv, p); } /* fft permuted roots of unity */ fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1, plan->recurse_kind); fftw_free(work); d->plan = plan; d->omega = omega; d->g = g; d->ginv = ginv; d->p = p; d->flags = flags; d->refcount = 1; d->next = NULL; d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc)); d->cdesc->name = NULL; d->cdesc->codelet = NULL; d->cdesc->size = p; d->cdesc->dir = FFTW_FORWARD; d->cdesc->type = FFTW_RADER; d->cdesc->signature = g; d->cdesc->ntwiddle = 0; d->cdesc->twiddle_order = NULL; return d; }
void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W, int m, int r, int stride, fftw_rader_data * d) { fftw_complex *tmp = (fftw_complex *) fftw_malloc((r - 1) * sizeof(fftw_complex)); int i, k, gpower = 1, g = d->g, ginv = d->ginv; fftw_real a0r, a0i; fftw_complex *omega = d->omega; for (i = 0; i < m; ++i, A += stride, W += r - 1) { /* * Here, we fft W[k-1] * A[k*(m*stride)], using Rader. * (Actually, W is pre-permuted to match the permutation that we * will do on A.) */ /* First, permute the input and multiply by W, storing in tmp: */ /* gpower == g^k mod r in the following loop */ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) { fftw_real rA, iA, rW, iW; rW = c_re(W[k]); iW = c_im(W[k]); rA = c_re(A[gpower * (m * stride)]); iA = c_im(A[gpower * (m * stride)]); c_re(tmp[k]) = rW * rA - iW * iA; c_im(tmp[k]) = rW * iA + iW * rA; } WHEN_DEBUG( { if (gpower != 1) fftw_die("incorrect generator in Rader\n"); } ); /* FFT tmp to A: */ fftw_executor_simple(r - 1, tmp, A + (m * stride), d->plan->root, 1, m * stride, d->plan->recurse_kind); /* set output DC component: */ a0r = c_re(A[0]); a0i = c_im(A[0]); c_re(A[0]) += c_re(A[(m * stride)]); c_im(A[0]) += c_im(A[(m * stride)]); /* now, multiply by omega: */ for (k = 0; k < r - 1; ++k) { fftw_real rA, iA, rW, iW; rW = c_re(omega[k]); iW = c_im(omega[k]); rA = c_re(A[(k + 1) * (m * stride)]); iA = c_im(A[(k + 1) * (m * stride)]); c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA; c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA); } /* this will add A[0] to all of the outputs after the ifft */ c_re(A[(m * stride)]) += a0r; c_im(A[(m * stride)]) -= a0i; /* inverse FFT: */ fftw_executor_simple(r - 1, A + (m * stride), tmp, d->plan->root, m * stride, 1, d->plan->recurse_kind); /* finally, do inverse permutation to unshuffle the output: */ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) { c_re(A[gpower * (m * stride)]) = c_re(tmp[k]); c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]); } WHEN_DEBUG( { if (gpower != 1) fftw_die("incorrect generator in Rader\n"); } );