static void rexecutor_many_inplace(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int howmany, int idist, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_REAL2HC: { fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + s * idist, in + n * istride + s * idist, istride, istride, -istride); break; } case FFTW_HC2REAL: { fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + n * istride + s * idist, in + s * idist, istride, -istride, istride); break; } default: { int s; fftw_real *tmp; if (out) tmp = out; else tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); for (s = 0; s < howmany; ++s) { rfftw_executor_simple(n, in + s * idist, tmp, p, istride, 1, recurse_kind); rfftw_strided_copy(n, tmp, istride, in + s * idist); } if (!out) fftw_free(tmp); } } }
static void executor_many(int n, const fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist, fftw_recurse_kind recurse_kind) { int s; switch (p->type) { case FFTW_NOTW: { fftw_notw_codelet *codelet = p->nodeu.notw.codelet; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, istride, ostride); break; } default: for (s = 0; s < howmany; ++s) fftw_executor_simple(n, in + s * idist, out + s * odist, p, istride, ostride, recurse_kind); } }
/* * in: array of n/2 + 1 complex numbers (* howmany). * out: array of n real numbers (* howmany). * work: array of n real numbers (stride 1) * * We must have out != in if dist < stride. */ void rfftw_c2real_aux(fftw_plan plan, int howmany, fftw_complex *in, int istride, int idist, fftw_real *out, int ostride, int odist, fftw_real *work) { fftw_plan_node *p = plan->root; switch (p->type) { case FFTW_HC2REAL: { fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; int j; HACK_ALIGN_STACK_ODD(); for (j = 0; j < howmany; ++j) codelet(&c_re(*(in + j * idist)), &c_im(*(in + j * idist)), out + j * odist, istride * 2, istride * 2, ostride); break; } default: { int j, n = plan->n; for (j = 0; j < howmany; ++j, in += idist, out += odist) { rfftw_c2hc(n, in, istride, work); rfftw_executor_simple(n, work, out, p, 1, ostride); } break; } } }
static void rexecutor_many(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist, fftw_recurse_kind recurse_kind) { int s; switch (p->type) { case FFTW_REAL2HC: { fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, out + n * ostride + s * odist, istride, ostride, -ostride); break; } case FFTW_HC2REAL: { fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + n * istride + s * idist, out + s * odist, istride, -istride, ostride); break; } default: for (s = 0; s < howmany; ++s) rfftw_executor_simple(n, in + s * idist, out + s * odist, p, istride, ostride, recurse_kind); } }
static void executor_many_inplace(int n, fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int howmany, int idist, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_NOTW: { fftw_notw_codelet *codelet = p->nodeu.notw.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + s * idist, istride, istride); break; } default: { int s; fftw_complex *tmp; if (out) tmp = out; else tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); for (s = 0; s < howmany; ++s) { fftw_executor_simple(n, in + s * idist, tmp, p, istride, 1, recurse_kind); fftw_strided_copy(n, tmp, istride, in + s * idist); } if (!out) fftw_free(tmp); } } }
/* * in: array of n real numbers (* howmany). * out: array of n/2 + 1 complex numbers (* howmany). * work: array of n real numbers (stride 1) * * We must have out != in if dist < stride. */ void rfftw_real2c_aux(fftw_plan plan, int howmany, fftw_real *in, int istride, int idist, fftw_complex *out, int ostride, int odist, fftw_real *work) { fftw_plan_node *p = plan->root; int j; switch (p->type) { case FFTW_REAL2HC: { fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; int n = plan->n; int n2 = (n & 1) ? 0 : (n + 1) / 2; HACK_ALIGN_STACK_ODD; for (j = 0; j < howmany; ++j, out += odist) { codelet(in + j * idist, &c_re(*out), &c_im(*out), istride, ostride * 2, ostride * 2); c_im(out[0]) = 0.0; c_im(out[n2 * ostride]) = 0.0; } break; } default: { int n = plan->n; fftw_recurse_kind recurse_kind = plan->recurse_kind; for (j = 0; j < howmany; ++j, in += idist, out += odist) { rfftw_executor_simple(n, in, work, p, istride, 1, recurse_kind); rfftw_hc2c(n, work, out, ostride); } break; } } }
/* * Do *not* declare simple executor static--we need to call it * from other files...also, preface its name with "fftw_" * to avoid any possible name collisions. */ void fftw_executor_simple(int n, const fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int ostride, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_NOTW: HACK_ALIGN_STACK_ODD; (p->nodeu.notw.codelet)(in, out, istride, ostride); break; case FFTW_TWIDDLE: { int r = p->nodeu.twiddle.size; int m = n / r; fftw_twiddle_codelet *codelet; fftw_complex *W; #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif executor_many(m, in, out, p->nodeu.twiddle.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else executor_many_vector(m, in, out, p->nodeu.twiddle.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet = p->nodeu.twiddle.codelet; W = p->nodeu.twiddle.tw->twarray; HACK_ALIGN_STACK_EVEN; codelet(out, W, m * ostride, m, ostride); break; } case FFTW_GENERIC: { int r = p->nodeu.generic.size; int m = n / r; fftw_generic_codelet *codelet; fftw_complex *W; #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif executor_many(m, in, out, p->nodeu.generic.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else executor_many_vector(m, in, out, p->nodeu.generic.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet = p->nodeu.generic.codelet; W = p->nodeu.generic.tw->twarray; codelet(out, W, m, r, n, ostride); break; } case FFTW_RADER: { int r = p->nodeu.rader.size; int m = n / r; fftw_rader_codelet *codelet; fftw_complex *W; #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif executor_many(m, in, out, p->nodeu.rader.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else executor_many_vector(m, in, out, p->nodeu.rader.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet = p->nodeu.rader.codelet; W = p->nodeu.rader.tw->twarray; codelet(out, W, m, r, ostride, p->nodeu.rader.rader_data); break; } default: fftw_die("BUG in executor: invalid plan\n"); break; } }
/* executor_many_vector is like executor_many, but it pushes the howmany loop down to the leaves of the transform: */ static void executor_many_vector(int n, const fftw_complex *in, fftw_complex *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist) { int s; switch (p->type) { case FFTW_NOTW: { fftw_notw_codelet *codelet = p->nodeu.notw.codelet; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, istride, ostride); break; } case FFTW_TWIDDLE: { int r = p->nodeu.twiddle.size; int m = n / r; fftw_twiddle_codelet *codelet; fftw_complex *W; for (s = 0; s < r; ++s) executor_many_vector(m, in + s * istride, out + s * (m * ostride), p->nodeu.twiddle.recurse, istride * r, ostride, howmany, idist, odist); codelet = p->nodeu.twiddle.codelet; W = p->nodeu.twiddle.tw->twarray; /* This may not be the right thing. We maybe should have the howmany loop for the twiddle codelets at the topmost level of the recursion, since odist is big; i.e. separate recursions for twiddle and notwiddle. */ HACK_ALIGN_STACK_EVEN; for (s = 0; s < howmany; ++s) codelet(out + s * odist, W, m * ostride, m, ostride); break; } case FFTW_GENERIC: { int r = p->nodeu.generic.size; int m = n / r; fftw_generic_codelet *codelet; fftw_complex *W; for (s = 0; s < r; ++s) executor_many_vector(m, in + s * istride, out + s * (m * ostride), p->nodeu.generic.recurse, istride * r, ostride, howmany, idist, odist); codelet = p->nodeu.generic.codelet; W = p->nodeu.generic.tw->twarray; for (s = 0; s < howmany; ++s) codelet(out + s * odist, W, m, r, n, ostride); break; } case FFTW_RADER: { int r = p->nodeu.rader.size; int m = n / r; fftw_rader_codelet *codelet; fftw_complex *W; for (s = 0; s < r; ++s) executor_many_vector(m, in + s * istride, out + s * (m * ostride), p->nodeu.rader.recurse, istride * r, ostride, howmany, idist, odist); codelet = p->nodeu.rader.codelet; W = p->nodeu.rader.tw->twarray; for (s = 0; s < howmany; ++s) codelet(out + s * odist, W, m, r, ostride, p->nodeu.rader.rader_data); break; } default: fftw_die("BUG in executor: invalid plan\n"); break; } }
void rfftw_executor_simple(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, fftw_recurse_kind recurse_kind) { switch (p->type) { case FFTW_REAL2HC: HACK_ALIGN_STACK_ODD; (p->nodeu.real2hc.codelet) (in, out, out + n * ostride, istride, ostride, -ostride); break; case FFTW_HC2REAL: HACK_ALIGN_STACK_ODD; (p->nodeu.hc2real.codelet) (in, in + n * istride, out, istride, -istride, ostride); break; case FFTW_HC2HC: { int r = p->nodeu.hc2hc.size; int m = n / r; /* * please do resist the temptation of initializing * these variables here. Doing so forces the * compiler to keep a live variable across the * recursive call. */ fftw_hc2hc_codelet *codelet; fftw_complex *W; switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.hc2hc.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.hc2hc.recurse, istride * r, ostride, r, istride, m * ostride); #endif W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(out, W, m * ostride, m, ostride); break; case FFTW_COMPLEX_TO_REAL: W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; codelet(in, W, m * istride, m, istride); #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.hc2hc.recurse, istride, ostride * r, r, m * istride, ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.hc2hc.recurse, istride, ostride * r, r, m * istride, ostride); #endif break; default: goto bug; } break; } case FFTW_RGENERIC: { int r = p->nodeu.rgeneric.size; int m = n / r; fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet; fftw_complex *W = p->nodeu.rgeneric.tw->twarray; switch (p->nodeu.rgeneric.dir) { case FFTW_REAL_TO_COMPLEX: #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.rgeneric.recurse, istride * r, ostride, r, istride, m * ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.rgeneric.recurse, istride * r, ostride, r, istride, m * ostride); #endif codelet(out, W, m, r, n, ostride); break; case FFTW_COMPLEX_TO_REAL: codelet(in, W, m, r, n, istride); #ifdef FFTW_ENABLE_VECTOR_RECURSE if (recurse_kind == FFTW_NORMAL_RECURSE) #endif rexecutor_many(m, in, out, p->nodeu.rgeneric.recurse, istride, ostride * r, r, m * istride, ostride, FFTW_NORMAL_RECURSE); #ifdef FFTW_ENABLE_VECTOR_RECURSE else rexecutor_many_vector(m, in, out, p->nodeu.rgeneric.recurse, istride, ostride * r, r, m * istride, ostride); #endif break; default: goto bug; } break; } default: bug: fftw_die("BUG in rexecutor: invalid plan\n"); break; } }
/* rexecutor_many_vector is like rexecutor_many, but it pushes the howmany loop down to the leaves of the transform: */ static void rexecutor_many_vector(int n, fftw_real *in, fftw_real *out, fftw_plan_node *p, int istride, int ostride, int howmany, int idist, int odist) { switch (p->type) { case FFTW_REAL2HC: { fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, out + s * odist, out + n * ostride + s * odist, istride, ostride, -ostride); break; } case FFTW_HC2REAL: { fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet; int s; HACK_ALIGN_STACK_ODD; for (s = 0; s < howmany; ++s) codelet(in + s * idist, in + n * istride + s * idist, out + s * odist, istride, -istride, ostride); break; } case FFTW_HC2HC: { int r = p->nodeu.hc2hc.size; int m = n / r; int i; fftw_hc2hc_codelet *codelet; fftw_complex *W; switch (p->nodeu.hc2hc.dir) { case FFTW_REAL_TO_COMPLEX: for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * istride, out + i * (m*ostride), p->nodeu.hc2hc.recurse, istride * r, ostride, howmany, idist, odist); W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; for (i = 0; i < howmany; ++i) codelet(out + i * odist, W, m * ostride, m, ostride); break; case FFTW_COMPLEX_TO_REAL: W = p->nodeu.hc2hc.tw->twarray; codelet = p->nodeu.hc2hc.codelet; HACK_ALIGN_STACK_EVEN; for (i = 0; i < howmany; ++i) codelet(in + i * idist, W, m * istride, m, istride); for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * (m*istride), out + i * ostride, p->nodeu.hc2hc.recurse, istride, ostride * r, howmany, idist, odist); break; default: goto bug; } break; } case FFTW_RGENERIC: { int r = p->nodeu.rgeneric.size; int m = n / r; int i; fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet; fftw_complex *W = p->nodeu.rgeneric.tw->twarray; switch (p->nodeu.rgeneric.dir) { case FFTW_REAL_TO_COMPLEX: for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * istride, out + i * (m * ostride), p->nodeu.rgeneric.recurse, istride * r, ostride, howmany, idist, odist); for (i = 0; i < howmany; ++i) codelet(out + i * odist, W, m, r, n, ostride); break; case FFTW_COMPLEX_TO_REAL: for (i = 0; i < howmany; ++i) codelet(in + i * idist, W, m, r, n, istride); for (i = 0; i < r; ++i) rexecutor_many_vector(m, in + i * m * istride, out + i * ostride, p->nodeu.rgeneric.recurse, istride, ostride * r, howmany, idist, odist); break; default: goto bug; } break; } default: bug: fftw_die("BUG in rexecutor: invalid plan\n"); break; } }