static void fftwnd_threads_aux(int nthreads, fftwnd_plan p, int cur_dim, fftw_complex *in, int istride, fftw_complex *out, int ostride) { int n_after = p->n_after[cur_dim], n = p->n[cur_dim]; if (cur_dim == p->rank - 2) { /* just do the last dimension directly: */ if (p->is_in_place) fftw_threads(nthreads, p->plans[p->rank - 1], n, in, istride, n_after * istride, (fftw_complex*)NULL, 0, 0); else fftw_threads(nthreads, p->plans[p->rank - 1], n, in, istride, n_after * istride, out, ostride, n_after * ostride); } else { /* we have at least two dimensions to go */ /* process the subsequent dimensions recursively, in hyperslabs, to get maximum locality: */ fftwnd_aux_many_threads(nthreads, n, n_after, p, cur_dim + 1, in, istride, out, ostride); } /* do the current dimension (in-place): */ fftw_threads(nthreads, p->plans[cur_dim], n_after, out, n_after * ostride, ostride, (fftw_complex*)NULL, 0, 0); }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t, t0; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t0); FFTW_TIME_FFT(fftw_threads(nthreads, plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
void fftwnd_threads(int nthreads, fftwnd_plan p, int howmany, fftw_complex *in, int istride, int idist, fftw_complex *out, int ostride, int odist) { switch (p->rank) { case 0: break; case 1: if (p->is_in_place) /* fft is in-place */ fftw_threads(nthreads, p->plans[0], howmany, in, istride, idist, (fftw_complex*) NULL, 0, 0); else fftw_threads(nthreads, p->plans[0], howmany, in, istride, idist, out, ostride, odist); break; default: /* rank >= 2 */ { int i; if (p->is_in_place) { out = in; ostride = istride; odist = idist; } if (nthreads <= 1) fftwnd(p, howmany, in, istride, idist, out, ostride, odist); else for (i = 0; i < howmany; ++i) fftwnd_threads_aux(nthreads, p, 0, in + i*idist, istride, out + i*odist, ostride); } } }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in1, *in2, *out2; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); if (!specific) plan = fftw_create_plan(n, dir, flags); else plan = fftw_create_plan_specific(n, dir, flags, in1, istride, (fftw_complex *) NULL, 0); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); } /* * fill in other positions of the array, to make sure that * fftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride + j]) = i * istride + j; c_im(in1[i * istride + j]) = i * istride - j; } CHECK(plan != NULL, "can't create plan"); WHEN_VERBOSE(2, fftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) fftw_threads(nthreads, plan, howmany, in1, istride, n * istride, (fftw_complex *) NULL, 0, 0); else fftw_threads_one(nthreads, plan, in1, NULL); fftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(c_re(in1[i * istride + j]) == i * istride + j && c_im(in1[i * istride + j]) == i * istride - j, "input has been overwritten"); for (i = 0; i < howmany; ++i) { fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); } CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); }