static fftw_rader_data *create_rader_aux(int p, int flags) { fftw_complex *omega, *work; int g, ginv, gpower; int i; FFTW_TRIG_REAL twoPiOverN; fftw_real scale = 1.0 / (p - 1); /* for convolution */ fftw_plan plan; fftw_rader_data *d; if (p < 2) fftw_die("non-prime order in Rader\n"); flags &= ~FFTW_IN_PLACE; d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data)); g = find_generator(p); ginv = power_mod(g, p - 2, p); omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); plan = fftw_create_plan(p - 1, FFTW_FORWARD, flags & ~FFTW_NO_VECTOR_RECURSE); work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p; gpower = 1; for (i = 0; i < p - 1; ++i) { c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower); c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN * gpower); gpower = MULMOD(gpower, ginv, p); } /* fft permuted roots of unity */ fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1, plan->recurse_kind); fftw_free(work); d->plan = plan; d->omega = omega; d->g = g; d->ginv = ginv; d->p = p; d->flags = flags; d->refcount = 1; d->next = NULL; d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc)); d->cdesc->name = NULL; d->cdesc->codelet = NULL; d->cdesc->size = p; d->cdesc->dir = FFTW_FORWARD; d->cdesc->type = FFTW_RADER; d->cdesc->signature = g; d->cdesc->ntwiddle = 0; d->cdesc->twiddle_order = NULL; return d; }
/* * Implementation of the FFT tester described in * * Funda Ergün. Testing multivariate linear functions: Overcoming the * generator bottleneck. In Proceedings of the Twenty-Seventh Annual * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, * Nevada, 29 May--1 June 1995. */ void test_ergun(int n, fftw_direction dir, fftw_plan plan) { fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; fftw_complex *tmp; fftw_complex impulse; int i; int rounds = 20; FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; inA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); inB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); inC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); WHEN_VERBOSE(2, printf("Validating plan, n = %d, dir = %s\n", n, dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_complex alpha, beta; c_re(alpha) = DRAND(); c_im(alpha) = DRAND(); c_re(beta) = DRAND(); c_im(beta) = DRAND(); fill_random(inA, n); fill_random(inB, n); fftw_out_of_place(plan, n, inA, outA); fftw_out_of_place(plan, n, inB, outB); array_scale(outA, alpha, n); array_scale(outB, beta, n); array_add(tmp, outA, outB, n); array_scale(inA, alpha, n); array_scale(inB, beta, n); array_add(inC, inA, inB, n); fftw_out_of_place(plan, n, inC, outC); array_compare(outC, tmp, n); } /* test 2: check that the unit impulse is transformed properly */ c_re(impulse) = 1.0; c_im(impulse) = 0.0; for (i = 0; i < n; ++i) { /* impulse */ c_re(inA[i]) = 0.0; c_im(inA[i]) = 0.0; /* transform of the impulse */ outA[i] = impulse; } inA[0] = impulse; for (i = 0; i < rounds; ++i) { fill_random(inB, n); array_sub(inC, inA, inB, n); fftw_out_of_place(plan, n, inB, outB); fftw_out_of_place(plan, n, inC, outC); array_add(tmp, outB, outC, n); array_compare(tmp, outA, n); } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ for (i = 0; i < rounds; ++i) { int j; fill_random(inA, n); array_rol(inB, inA, n, 1, 1); fftw_out_of_place(plan, n, inA, outA); fftw_out_of_place(plan, n, inB, outB); for (j = 0; j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); c_re(tmp[j]) = c_re(outB[j]) * c - c_im(outB[j]) * s; c_im(tmp[j]) = c_re(outB[j]) * s + c_im(outB[j]) * c; } array_compare(tmp, outA, n); } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB); fftw_free(inA); }
/* Same as test_ergun, but for multi-dimensional transforms: */ void testnd_ergun(int rank, int *n, fftw_direction dir, fftwnd_plan plan) { fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; fftw_complex *tmp; fftw_complex impulse; int N, n_before, n_after, dim; int i, which_impulse; int rounds = 20; FFTW_TRIG_REAL twopin; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; inA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); inB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); inC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); tmp = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); WHEN_VERBOSE(2, printf("Validating plan, N = %d, dir = %s\n", N, dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_complex alpha, beta; c_re(alpha) = DRAND(); c_im(alpha) = DRAND(); c_re(beta) = DRAND(); c_im(beta) = DRAND(); fill_random(inA, N); fill_random(inB, N); fftwnd(plan, 1, inA, 1, N, outA, 1, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); array_scale(outA, alpha, N); array_scale(outB, beta, N); array_add(tmp, outA, outB, N); array_scale(inA, alpha, N); array_scale(inB, beta, N); array_add(inC, inA, inB, N); fftwnd(plan, 1, inC, 1, N, outC, 1, N); array_compare(outC, tmp, N); } /* * test 2: check that the unit impulse is transformed properly -- we * need to test both the real and imaginary impulses */ for (which_impulse = 0; which_impulse < 2; ++which_impulse) { if (which_impulse == 0) { /* real impulse */ c_re(impulse) = 1.0; c_im(impulse) = 0.0; } else { /* imaginary impulse */ c_re(impulse) = 0.0; c_im(impulse) = 1.0; } for (i = 0; i < N; ++i) { /* impulse */ c_re(inA[i]) = 0.0; c_im(inA[i]) = 0.0; /* transform of the impulse */ outA[i] = impulse; } inA[0] = impulse; for (i = 0; i < rounds; ++i) { fill_random(inB, N); array_sub(inC, inA, inB, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); fftwnd(plan, 1, inC, 1, N, outC, 1, N); array_add(tmp, outB, outC, N); array_compare(tmp, outA, N); } } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ /* -- we have to check shifts in each dimension */ n_before = 1; n_after = N; for (dim = 0; dim < rank; ++dim) { int n_cur = n[dim]; n_after /= n_cur; twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n_cur; for (i = 0; i < rounds; ++i) { int j, jb, ja; fill_random(inA, N); array_rol(inB, inA, n_cur, n_before, n_after); fftwnd(plan, 1, inA, 1, N, outA, 1, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); for (jb = 0; jb < n_before; ++jb) for (j = 0; j < n_cur; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); for (ja = 0; ja < n_after; ++ja) { c_re(tmp[(jb * n_cur + j) * n_after + ja]) = c_re(outB[(jb * n_cur + j) * n_after + ja]) * c - c_im(outB[(jb * n_cur + j) * n_after + ja]) * s; c_im(tmp[(jb * n_cur + j) * n_after + ja]) = c_re(outB[(jb * n_cur + j) * n_after + ja]) * s + c_im(outB[(jb * n_cur + j) * n_after + ja]) * c; } } array_compare(tmp, outA, N); } n_before *= n_cur; } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB); fftw_free(inA); }
/* * This is a real (as opposed to complex) variation of the FFT tester * described in * * Funda Ergün. Testing multivariate linear functions: Overcoming the * generator bottleneck. In Proceedings of the Twenty-Seventh Annual * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, * Nevada, 29 May--1 June 1995. */ void test_ergun(int n, fftw_direction dir, fftw_plan plan) { fftw_real *inA, *inB, *inC, *outA, *outB, *outC; fftw_real *inA1, *inB1; fftw_real *tmp; int i; int rounds = 20; FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; inA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inA1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inB1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); WHEN_VERBOSE(2, printf("Validating plan, n = %d, dir = %s\n", n, dir == FFTW_REAL_TO_COMPLEX ? "REAL_TO_COMPLEX" : "COMPLEX_TO_REAL")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_real alpha, beta; alpha = DRAND(); beta = DRAND(); rfill_random(inA, n); rfill_random(inB, n); rarray_scale(inA1, inA, alpha, n); rarray_scale(inB1, inB, beta, n); rarray_add(inC, inA1, inB1, n); rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); rarray_scale(outA, outA, alpha, n); rarray_scale(outB, outB, beta, n); rarray_add(tmp, outA, outB, n); rfftw_out_of_place(plan, n, inC, outC); rarray_compare(outC, tmp, n); } /* test 2: check that the unit impulse is transformed properly */ for (i = 0; i < n; ++i) { /* impulse */ inA[i] = 0.0; /* transform of the impulse */ if (2 * i <= n) outA[i] = 1.0; else outA[i] = 0.0; } inA[0] = 1.0; if (dir == FFTW_REAL_TO_COMPLEX) { for (i = 0; i < rounds; ++i) { rfill_random(inB, n); rarray_sub(inC, inA, inB, n); rfftw_out_of_place(plan, n, inB, outB); rfftw_out_of_place(plan, n, inC, outC); rarray_add(tmp, outB, outC, n); rarray_compare(tmp, outA, n); } } else { for (i = 0; i < rounds; ++i) { rfill_random(outB, n); rarray_sub(outC, outA, outB, n); rfftw_out_of_place(plan, n, outB, inB); rfftw_out_of_place(plan, n, outC, inC); rarray_add(tmp, inB, inC, n); rarray_scale(tmp, tmp, 1.0 / ((double) n), n); rarray_compare(tmp, inA, n); } } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ if (dir == FFTW_REAL_TO_COMPLEX) { for (i = 0; i < rounds; ++i) { int j; rfill_random(inA, n); rarray_rol(inB, inA, n, 1, 1); rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); tmp[0] = outB[0]; for (j = 1; 2 * j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); tmp[j] = outB[j] * c - outB[n - j] * s; tmp[n - j] = outB[j] * s + outB[n - j] * c; } if (2 * j == n) tmp[j] = -outB[j]; rarray_compare(tmp, outA, n); } } else { for (i = 0; i < rounds; ++i) { int j; rfill_random(inA, n); inB[0] = inA[0]; for (j = 1; 2 * j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); inB[j] = inA[j] * c - inA[n - j] * s; inB[n - j] = inA[j] * s + inA[n - j] * c; } if (2 * j == n) inB[j] = -inA[j]; rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); rarray_rol(tmp, outA, n, 1, 1); rarray_compare(tmp, outB, n); } } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB1); fftw_free(inA1); fftw_free(inB); fftw_free(inA); }