void test_correctness(int n) { int howmany; fftw_plan validated_plan_forward, validated_plan_backward; WHEN_VERBOSE(1, my_printf("Testing correctness for n = %d...", n); my_fflush(stdout)); /* produce a good plan */ validated_plan_forward = fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag); validated_plan_backward = fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag); for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) test_in_place_both(n, howmany, howmany, validated_plan_forward, validated_plan_backward); fftw_destroy_plan(validated_plan_forward); fftw_destroy_plan(validated_plan_backward); if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak) fftw_check_memory_leaks(); WHEN_VERBOSE(1, my_printf("OK\n")); }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { fftw_complex *in; fftwnd_plan plan; double t; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (fftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); fftw_free(in); WHEN_VERBOSE(1, printf("\n")); }
void test_out_of_place(int n, int istride, int ostride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { /* one-dim. out-of-place transforms will never be supported in MPI */ WHEN_VERBOSE(2, my_printf("N/A\n")); }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t, t0; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t0); FFTW_TIME_FFT(fftw_threads(nthreads, plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
void test_in_place_both(int n, int istride, int howmany, fftw_plan validated_plan_forward, fftw_plan validated_plan_backward) { WHEN_VERBOSE(2, my_printf("TEST CORRECTNESS (in place, FFTW_FORWARD, %s) " "n = %d istride = %d howmany = %d\n", SPECIFICP(0), n, istride, howmany)); test_in_place(n, istride, howmany, FFTW_FORWARD, validated_plan_forward, 0); WHEN_VERBOSE(2, my_printf("TEST CORRECTNESS (in place, FFTW_BACKWARD, %s) " "n = %d istride = %d howmany = %d\n", SPECIFICP(0), n, istride, howmany)); test_in_place(n, istride, howmany, FFTW_BACKWARD, validated_plan_backward, 0); }
void test_in_place_both(int n, int istride, int howmany, fftw_plan validated_plan_forward, fftw_plan validated_plan_backward) { int specific; for (specific = 0; specific <= 1; ++specific) { WHEN_VERBOSE(2, printf("TEST CORRECTNESS (in place, FFTW_FORWARD, %s) " "n = %d istride = %d howmany = %d\n", SPECIFICP(specific), n, istride, howmany)); test_in_place(n, istride, howmany, FFTW_FORWARD, validated_plan_forward, specific); WHEN_VERBOSE(2, printf("TEST CORRECTNESS (in place, FFTW_BACKWARD, %s) " "n = %d istride = %d howmany = %d\n", SPECIFICP(specific), n, istride, howmany)); test_in_place(n, istride, howmany, FFTW_BACKWARD, validated_plan_backward, specific); } }
void test_correctness(int n) { int istride, ostride, howmany; fftw_plan validated_plan_forward, validated_plan_backward; WHEN_VERBOSE(1, printf("Testing correctness for n = %d...", n); fflush(stdout)); /* produce a *good* plan (validated by Ergun's test procedure) */ validated_plan_forward = fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag); test_ergun(n, FFTW_FORWARD, validated_plan_forward); validated_plan_backward = fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag); test_ergun(n, FFTW_BACKWARD, validated_plan_backward); for (istride = 1; istride <= MAX_STRIDE; ++istride) for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) test_out_of_place_both(n, istride, ostride, howmany, validated_plan_forward, validated_plan_backward); for (istride = 1; istride <= MAX_STRIDE; ++istride) for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) test_in_place_both(n, istride, howmany, validated_plan_forward, validated_plan_backward); fftw_destroy_plan(validated_plan_forward); fftw_destroy_plan(validated_plan_backward); if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak) fftw_check_memory_leaks(); WHEN_VERBOSE(1, printf("OK\n")); }
/* Same as test_ergun, but for multi-dimensional transforms: */ void testnd_ergun(int rank, int *n, fftw_direction dir, fftwnd_plan plan) { fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; fftw_complex *tmp; fftw_complex impulse; int N, n_before, n_after, dim; int i, which_impulse; int rounds = 20; FFTW_TRIG_REAL twopin; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; inA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); inB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); inC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); tmp = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); WHEN_VERBOSE(2, printf("Validating plan, N = %d, dir = %s\n", N, dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_complex alpha, beta; c_re(alpha) = DRAND(); c_im(alpha) = DRAND(); c_re(beta) = DRAND(); c_im(beta) = DRAND(); fill_random(inA, N); fill_random(inB, N); fftwnd(plan, 1, inA, 1, N, outA, 1, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); array_scale(outA, alpha, N); array_scale(outB, beta, N); array_add(tmp, outA, outB, N); array_scale(inA, alpha, N); array_scale(inB, beta, N); array_add(inC, inA, inB, N); fftwnd(plan, 1, inC, 1, N, outC, 1, N); array_compare(outC, tmp, N); } /* * test 2: check that the unit impulse is transformed properly -- we * need to test both the real and imaginary impulses */ for (which_impulse = 0; which_impulse < 2; ++which_impulse) { if (which_impulse == 0) { /* real impulse */ c_re(impulse) = 1.0; c_im(impulse) = 0.0; } else { /* imaginary impulse */ c_re(impulse) = 0.0; c_im(impulse) = 1.0; } for (i = 0; i < N; ++i) { /* impulse */ c_re(inA[i]) = 0.0; c_im(inA[i]) = 0.0; /* transform of the impulse */ outA[i] = impulse; } inA[0] = impulse; for (i = 0; i < rounds; ++i) { fill_random(inB, N); array_sub(inC, inA, inB, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); fftwnd(plan, 1, inC, 1, N, outC, 1, N); array_add(tmp, outB, outC, N); array_compare(tmp, outA, N); } } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ /* -- we have to check shifts in each dimension */ n_before = 1; n_after = N; for (dim = 0; dim < rank; ++dim) { int n_cur = n[dim]; n_after /= n_cur; twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n_cur; for (i = 0; i < rounds; ++i) { int j, jb, ja; fill_random(inA, N); array_rol(inB, inA, n_cur, n_before, n_after); fftwnd(plan, 1, inA, 1, N, outA, 1, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); for (jb = 0; jb < n_before; ++jb) for (j = 0; j < n_cur; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); for (ja = 0; ja < n_after; ++ja) { c_re(tmp[(jb * n_cur + j) * n_after + ja]) = c_re(outB[(jb * n_cur + j) * n_after + ja]) * c - c_im(outB[(jb * n_cur + j) * n_after + ja]) * s; c_im(tmp[(jb * n_cur + j) * n_after + ja]) = c_re(outB[(jb * n_cur + j) * n_after + ja]) * s + c_im(outB[(jb * n_cur + j) * n_after + ja]) * c; } } array_compare(tmp, outA, N); } n_before *= n_cur; } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB); fftw_free(inA); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size; fftw_complex *in1, *work = NULL, *in2, *out2; fftw_mpi_plan plan; int i; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (specific) { WHEN_VERBOSE(2, my_printf("N/A\n")); return; } if (coinflip()) flags |= FFTW_THREADSAFE; plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags); fftw_mpi_local_sizes(plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); in1 = (fftw_complex *) fftw_malloc(total_local_size * sizeof(fftw_complex) * howmany); if (coinflip()) { WHEN_VERBOSE(2, my_printf("w/work...")); work = (fftw_complex *) fftw_malloc(total_local_size * sizeof(fftw_complex) * howmany); } in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); } for (i = 0; i < local_n * howmany; ++i) { c_re(in1[i]) = c_re(in2[i + local_start*howmany]); c_im(in1[i]) = c_im(in2[i + local_start*howmany]); } /* fft-ize */ fftw_mpi(plan, howmany, in1, work); fftw_mpi_destroy_plan(plan); fftw(validated_plan, howmany, in2, howmany, 1, out2, howmany, 1); CHECK(compute_error_complex(in1, 1, out2 + local_start_after_transform*howmany, 1, howmany*local_n_after_transform) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, my_printf("OK\n")); fftw_free(in1); fftw_free(work); fftw_free(in2); fftw_free(out2); }
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan, int alternate_api, int specific) { int istride, ostride, howmany; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real)); out3 = in1; out1 = (fftw_complex *) in1; in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (alternate_api && specific && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan_specific(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw2d_create_plan_specific(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } else { p = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } } else if (specific) { p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); } else if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags); } else { p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags); } } else { p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); } CHECK(p != NULL && ip != NULL, "can't create plan"); for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i) out3[i] = 0; for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nhc * 2 + j) * istride + k] = c_re(in2[i * nr + j]); } fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); howmany = ostride = istride; WHEN_VERBOSE(2, printf("\n testing in-place stride %d...", istride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, NULL); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "in-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, NULL); for (i = 0; i < nc * nhc * 2 * istride; ++i) out3[i] *= 1.0 / N; for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nhc * 2 * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "in-place (c2r): wrong answer (check 2)"); } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { int local_nx, local_x_start, local_ny_after_transpose, local_y_start_after_transpose, total_local_size; fftw_complex *in, *work; fftwnd_plan plan = 0; fftwnd_mpi_plan mpi_plan; double t, t0 = 0.0; int i, N; if (sz.rank < 2) return; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); if (specific) { return; } else { if (io_okay && !only_parallel) plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } CHECK(mpi_plan != NULL, "can't create plan"); fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start, &local_ny_after_transpose, &local_y_start_after_transpose, &total_local_size); if (io_okay && !only_parallel) in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); else in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields * sizeof(fftw_complex)); if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t0); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, NULL, FFTW_NORMAL_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, NULL, FFTW_TRANSPOSED_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, work, FFTW_NORMAL_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, work, FFTW_TRANSPOSED_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t)); } fftwnd_mpi_destroy_plan(mpi_plan); fftw_free(in); fftw_free(work); WHEN_VERBOSE(1, my_printf("\n")); }
void test_planner(int rank) { /* * create and destroy many plans, at random. Check the * garbage-collecting allocator of twiddle factors */ int i, dim; int r, s; fftw_plan p[PLANNER_TEST_SIZE]; fftwnd_plan pnd[PLANNER_TEST_SIZE]; int *narr, maxdim; chk_mem_leak = 0; verbose--; please_wait(); if (rank < 1) rank = 1; narr = (int *) fftw_malloc(rank * sizeof(int)); maxdim = (int) pow(8192.0, 1.0/rank); for (i = 0; i < PLANNER_TEST_SIZE; ++i) { p[i] = (fftw_plan) 0; pnd[i] = (fftwnd_plan) 0; } for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { r = rand(); if (r < 0) r = -r; r = r % PLANNER_TEST_SIZE; for (dim = 0; dim < rank; ++dim) { do { s = rand(); if (s < 0) s = -s; s = s % maxdim + 1; } while (s == 0); narr[dim] = s; } if (rank == 1) { if (p[r]) fftw_destroy_plan(p[r]); p[r] = fftw_create_plan(narr[0], random_dir(), measure_flag | wisdom_flag); if (paranoid && narr[0] < 200) test_correctness(narr[0]); } if (pnd[r]) fftwnd_destroy_plan(pnd[r]); pnd[r] = fftwnd_create_plan(rank, narr, random_dir(), measure_flag | wisdom_flag); if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { WHEN_VERBOSE(0, printf("test planner: so far so good\n")); WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n", i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); } } for (i = 0; i < PLANNER_TEST_SIZE; ++i) { if (p[i]) fftw_destroy_plan(p[i]); if (pnd[i]) fftwnd_destroy_plan(pnd[i]); } fftw_free(narr); verbose++; chk_mem_leak = 1; }
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan) { int istride, ostride; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); CHECK(p != NULL && ip != NULL, "can't create plan"); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nr + j) * istride + k] = c_re(in2[i * nr + j]); } for (i = 0; i < N * istride; ++i) out3[i] = 0.0; fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { int howmany = (istride < ostride) ? istride : ostride; WHEN_VERBOSE(2, printf("\n testing stride %d/%d...", istride, ostride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, out1); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "out-of-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, out3); for (i = 0; i < N * istride; ++i) out3[i] *= 1.0 / N; if (istride == howmany) CHECK(compute_error(out3, 1, in1, 1, N * istride) < TOLERANCE, "out-of-place (c2r): wrong answer"); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nr * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "out-of-place (c2r): wrong answer (check 2)"); } } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out3); fftw_free(out2); fftw_free(in2); fftw_free(out1); fftw_free(in1); }
void test_planner(int rank) { /* * create and destroy many plans, at random. Check the * garbage-collecting allocator of twiddle factors */ int i, dim; int r, s; fftw_mpi_plan p[PLANNER_TEST_SIZE]; fftwnd_mpi_plan pnd[PLANNER_TEST_SIZE]; int *narr, maxdim; chk_mem_leak = 0; verbose--; please_wait(); if (rank < 1) rank = 1; narr = (int *) fftw_malloc(rank * sizeof(int)); for (i = 0; i < PLANNER_TEST_SIZE; ++i) { p[i] = (fftw_mpi_plan) 0; pnd[i] = (fftwnd_mpi_plan) 0; } if (PLANNER_TEST_SIZE >= 8) { p[0] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0); p[1] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0); p[2] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0); p[3] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0); p[4] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0); p[5] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0); p[6] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0); p[7] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0); } maxdim = (int) pow(8192, 1.0/rank); for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { r = rand(); if (r < 0) r = -r; r = r % PLANNER_TEST_SIZE; for (dim = 0; dim < rank; ++dim) { do { s = rand(); if (s < 0) s = -s; s = s % maxdim + 1; } while (s == 0); narr[dim] = s; } if (rank == 1) { if (p[r]) fftw_mpi_destroy_plan(p[r]); p[r] = fftw_mpi_create_plan(MPI_COMM_WORLD, narr[0], random_dir(), measure_flag | wisdom_flag); } if (rank > 1) { if (pnd[r]) fftwnd_mpi_destroy_plan(pnd[r]); pnd[r] = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, narr, random_dir(), measure_flag | wisdom_flag); } if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { WHEN_VERBOSE(0, my_printf("test planner: so far so good\n")); WHEN_VERBOSE(0, my_printf("test planner: iteration %d" " out of %d\n", i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); } } for (i = 0; i < PLANNER_TEST_SIZE; ++i) { if (p[i]) fftw_mpi_destroy_plan(p[i]); if (pnd[i]) fftwnd_mpi_destroy_plan(pnd[i]); } fftw_free(narr); verbose++; chk_mem_leak = 1; }
/* * This is a real (as opposed to complex) variation of the FFT tester * described in * * Funda Ergün. Testing multivariate linear functions: Overcoming the * generator bottleneck. In Proceedings of the Twenty-Seventh Annual * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, * Nevada, 29 May--1 June 1995. */ void test_ergun(int n, fftw_direction dir, fftw_plan plan) { fftw_real *inA, *inB, *inC, *outA, *outB, *outC; fftw_real *inA1, *inB1; fftw_real *tmp; int i; int rounds = 20; FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; inA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inA1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inB1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); WHEN_VERBOSE(2, printf("Validating plan, n = %d, dir = %s\n", n, dir == FFTW_REAL_TO_COMPLEX ? "REAL_TO_COMPLEX" : "COMPLEX_TO_REAL")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_real alpha, beta; alpha = DRAND(); beta = DRAND(); rfill_random(inA, n); rfill_random(inB, n); rarray_scale(inA1, inA, alpha, n); rarray_scale(inB1, inB, beta, n); rarray_add(inC, inA1, inB1, n); rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); rarray_scale(outA, outA, alpha, n); rarray_scale(outB, outB, beta, n); rarray_add(tmp, outA, outB, n); rfftw_out_of_place(plan, n, inC, outC); rarray_compare(outC, tmp, n); } /* test 2: check that the unit impulse is transformed properly */ for (i = 0; i < n; ++i) { /* impulse */ inA[i] = 0.0; /* transform of the impulse */ if (2 * i <= n) outA[i] = 1.0; else outA[i] = 0.0; } inA[0] = 1.0; if (dir == FFTW_REAL_TO_COMPLEX) { for (i = 0; i < rounds; ++i) { rfill_random(inB, n); rarray_sub(inC, inA, inB, n); rfftw_out_of_place(plan, n, inB, outB); rfftw_out_of_place(plan, n, inC, outC); rarray_add(tmp, outB, outC, n); rarray_compare(tmp, outA, n); } } else { for (i = 0; i < rounds; ++i) { rfill_random(outB, n); rarray_sub(outC, outA, outB, n); rfftw_out_of_place(plan, n, outB, inB); rfftw_out_of_place(plan, n, outC, inC); rarray_add(tmp, inB, inC, n); rarray_scale(tmp, tmp, 1.0 / ((double) n), n); rarray_compare(tmp, inA, n); } } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ if (dir == FFTW_REAL_TO_COMPLEX) { for (i = 0; i < rounds; ++i) { int j; rfill_random(inA, n); rarray_rol(inB, inA, n, 1, 1); rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); tmp[0] = outB[0]; for (j = 1; 2 * j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); tmp[j] = outB[j] * c - outB[n - j] * s; tmp[n - j] = outB[j] * s + outB[n - j] * c; } if (2 * j == n) tmp[j] = -outB[j]; rarray_compare(tmp, outA, n); } } else { for (i = 0; i < rounds; ++i) { int j; rfill_random(inA, n); inB[0] = inA[0]; for (j = 1; 2 * j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); inB[j] = inA[j] * c - inA[n - j] * s; inB[n - j] = inA[j] * s + inA[n - j] * c; } if (2 * j == n) inB[j] = -inA[j]; rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); rarray_rol(tmp, outA, n, 1, 1); rarray_compare(tmp, outB, n); } } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB1); fftw_free(inA1); fftw_free(inB); fftw_free(inA); }
static void fftw_plan_hook_function(fftw_plan p) { WHEN_VERBOSE(3, printf("Validating tentative plan\n")); WHEN_VERBOSE(3, fftw_print_plan(p)); test_ergun(p->n, p->dir, p); }
/* * Implementation of the FFT tester described in * * Funda Ergün. Testing multivariate linear functions: Overcoming the * generator bottleneck. In Proceedings of the Twenty-Seventh Annual * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, * Nevada, 29 May--1 June 1995. */ void test_ergun(int n, fftw_direction dir, fftw_plan plan) { fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; fftw_complex *tmp; fftw_complex impulse; int i; int rounds = 20; FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; inA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); inB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); inC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); WHEN_VERBOSE(2, printf("Validating plan, n = %d, dir = %s\n", n, dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_complex alpha, beta; c_re(alpha) = DRAND(); c_im(alpha) = DRAND(); c_re(beta) = DRAND(); c_im(beta) = DRAND(); fill_random(inA, n); fill_random(inB, n); fftw_out_of_place(plan, n, inA, outA); fftw_out_of_place(plan, n, inB, outB); array_scale(outA, alpha, n); array_scale(outB, beta, n); array_add(tmp, outA, outB, n); array_scale(inA, alpha, n); array_scale(inB, beta, n); array_add(inC, inA, inB, n); fftw_out_of_place(plan, n, inC, outC); array_compare(outC, tmp, n); } /* test 2: check that the unit impulse is transformed properly */ c_re(impulse) = 1.0; c_im(impulse) = 0.0; for (i = 0; i < n; ++i) { /* impulse */ c_re(inA[i]) = 0.0; c_im(inA[i]) = 0.0; /* transform of the impulse */ outA[i] = impulse; } inA[0] = impulse; for (i = 0; i < rounds; ++i) { fill_random(inB, n); array_sub(inC, inA, inB, n); fftw_out_of_place(plan, n, inB, outB); fftw_out_of_place(plan, n, inC, outC); array_add(tmp, outB, outC, n); array_compare(tmp, outA, n); } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ for (i = 0; i < rounds; ++i) { int j; fill_random(inA, n); array_rol(inB, inA, n, 1, 1); fftw_out_of_place(plan, n, inA, outA); fftw_out_of_place(plan, n, inB, outB); for (j = 0; j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); c_re(tmp[j]) = c_re(outB[j]) * c - c_im(outB[j]) * s; c_im(tmp[j]) = c_re(outB[j]) * s + c_im(outB[j]) * c; } array_compare(tmp, outA, n); } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB); fftw_free(inA); }
void testnd_in_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan, int alternate_api, int specific, int force_buffered) { int local_nx, local_x_start, local_ny_after_transpose, local_y_start_after_transpose, total_local_size; int istride; int N, dim, i; fftw_complex *in1, *work = 0, *in2; fftwnd_mpi_plan p = 0; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (specific || rank < 2) return; if (coinflip()) flags |= FFTW_THREADSAFE; if (force_buffered) flags |= FFTWND_FORCE_BUFFERED; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_mpi_create_plan(MPI_COMM_WORLD, n[0], n[1], dir, flags); else p = fftw3d_mpi_create_plan(MPI_COMM_WORLD, n[0], n[1], n[2], dir, flags); } else /* standard api */ p = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, dir, flags); fftwnd_mpi_local_sizes(p, &local_nx, &local_x_start, &local_ny_after_transpose, &local_y_start_after_transpose, &total_local_size); in1 = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE * sizeof(fftw_complex)); if (coinflip()) { WHEN_VERBOSE(1, my_printf("w/work...")); work = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE * sizeof(fftw_complex)); } in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < N; ++i) { c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); } for (i = 0; i < local_nx * (N/n[0]); ++i) { int j; for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re((in2 + local_x_start * (N/n[0])) [i]); c_im(in1[i * istride + j]) = c_im((in2 + local_x_start * (N/n[0])) [i]); } } fftwnd_mpi(p, istride, in1, work, FFTW_NORMAL_ORDER); fftwnd(validated_plan, 1, in2, 1, 1, NULL, 0, 0); for (i = 0; i < istride; ++i) CHECK(compute_error_complex(in1 + i, istride, in2 + local_x_start * (N/n[0]), 1, local_nx * (N/n[0])) < TOLERANCE, "testnd_in_place: wrong answer"); } fftwnd_mpi_destroy_plan(p); fftw_free(in2); fftw_free(work); fftw_free(in1); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in1, *in2, *out2; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); if (!specific) plan = fftw_create_plan(n, dir, flags); else plan = fftw_create_plan_specific(n, dir, flags, in1, istride, (fftw_complex *) NULL, 0); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); } /* * fill in other positions of the array, to make sure that * fftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride + j]) = i * istride + j; c_im(in1[i * istride + j]) = i * istride - j; } CHECK(plan != NULL, "can't create plan"); WHEN_VERBOSE(2, fftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) fftw(plan, howmany, in1, istride, n * istride, (fftw_complex *) NULL, 0, 0); else fftw_one(plan, in1, NULL); fftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(c_re(in1[i * istride + j]) == i * istride + j && c_im(in1[i * istride + j]) == i * istride - j, "input has been overwritten"); for (i = 0; i < howmany; ++i) { fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); } CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size, nalloc; fftw_complex *in, *work; fftw_plan plan = 0; fftw_mpi_plan mpi_plan; double t, t0 = 0.0; if (specific || !(flags & FFTW_IN_PLACE)) return; if (io_okay && !only_parallel) plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); CHECK(mpi_plan, "failed to create plan!"); fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); if (io_okay && !only_parallel) nalloc = n; else nalloc = total_local_size; in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); if (io_okay) { WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan)); } if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, work, 1, 0), in, n * howmany_fields, t0); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t)); } fftw_free(in); fftw_free(work); fftw_mpi_destroy_plan(mpi_plan); WHEN_VERBOSE(1, my_printf("\n")); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in2, *out2; fftw_real *in1, *out1, *out3; fftw_plan plan; int i, j; int ostride = istride; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_real *) fftw_malloc(istride * n * sizeof(fftw_real) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); out1 = in1; out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); out3 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); if (!specific) plan = rfftw_create_plan(n, dir, flags); else plan = rfftw_create_plan_specific(n, dir, flags, in1, istride, out1, ostride); CHECK(plan != NULL, "can't create plan"); /* generate random inputs */ fill_random(in1, n, istride); for (j = 1; j < howmany; ++j) for (i = 0; i < n; ++i) in1[(j * n + i) * istride] = in1[i * istride]; /* copy random inputs to complex array for comparison with fftw: */ if (dir == FFTW_REAL_TO_COMPLEX) for (i = 0; i < n; ++i) { c_re(in2[i]) = in1[i * istride]; c_im(in2[i]) = 0.0; } else { int n2 = (n + 1) / 2; c_re(in2[0]) = in1[0]; c_im(in2[0]) = 0.0; for (i = 1; i < n2; ++i) { c_re(in2[i]) = in1[i * istride]; c_im(in2[i]) = in1[(n - i) * istride]; } if (n2 * 2 == n) { c_re(in2[n2]) = in1[n2 * istride]; c_im(in2[n2]) = 0.0; ++i; } for (; i < n; ++i) { c_re(in2[i]) = c_re(in2[n - i]); c_im(in2[i]) = -c_im(in2[n - i]); } } /* * fill in other positions of the array, to make sure that * rfftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) in1[i * istride + j] = i * istride + j; WHEN_VERBOSE(2, rfftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) rfftw(plan, howmany, in1, istride, n * istride, 0, 0, 0); else rfftw_one(plan, in1, NULL); rfftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < ostride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(out1[i * ostride + j] == i * ostride + j, "output has been overwritten"); fftw(validated_plan, 1, in2, 1, n, out2, 1, n); if (dir == FFTW_REAL_TO_COMPLEX) { int n2 = (n + 1) / 2; out3[0] = c_re(out2[0]); for (i = 1; i < n2; ++i) { out3[i] = c_re(out2[i]); out3[n - i] = c_im(out2[i]); } if (n2 * 2 == n) out3[n2] = c_re(out2[n2]); } else { for (i = 0; i < n; ++i) out3[i] = c_re(out2[i]); } for (j = 0; j < howmany; ++j) CHECK(compute_error(out1 + j * n * ostride, ostride, out3, 1, n) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); fftw_free(out3); }