void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); if (paranoid && !(flags & FFTW_IN_PLACE)) { begin = fftw_get_time(); test_ergun(n, dir, plan); end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for validation: %f s\n", t)); } FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
/* * Same as fftw_measure_runtime, except for fftwnd plan. */ double fftwnd_measure_runtime(fftwnd_plan plan, fftw_complex *in, int istride, fftw_complex *out, int ostride) { fftw_time begin, end, start; double t, tmax, tmin; int i, iter; int n; int repeat; if (plan->rank == 0) return 0.0; n = 1; for (i = 0; i < plan->rank; ++i) n *= plan->n[i]; iter = 1; for (;;) { tmin = 1.0E10; tmax = -1.0E10; init_test_array(in, istride, n); start = fftw_get_time(); /* repeat the measurement FFTW_TIME_REPEAT times */ for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) { begin = fftw_get_time(); for (i = 0; i < iter; ++i) { fftwnd(plan, 1, in, istride, 0, out, ostride, 0); } end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); if (t < tmin) tmin = t; if (t > tmax) tmax = t; /* do not run for too long */ t = fftw_time_to_sec(fftw_time_diff(end, start)); if (t > FFTW_TIME_LIMIT) break; } if (tmin >= FFTW_TIME_MIN) break; iter *= 2; } tmin /= (double) iter; tmax /= (double) iter; return tmin; }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { fftw_complex *in; fftwnd_plan plan; double t; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (fftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); fftw_free(in); WHEN_VERBOSE(1, printf("\n")); }
/* * The timer keeps doubling the number of iterations * until the program runs for more than FFTW_TIME_MIN */ double fftw_measure_runtime(fftw_plan plan) { FFTW_COMPLEX *in, *out; fftw_time begin, end; double t; int i, iter; int n; n = plan->n; iter = 1; retry: in = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX)); out = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX)); begin = fftw_get_time(); for (i = 0; i < iter; ++i) { int j; /* generate random inputs */ for (j = 0; j < n; ++j) { c_re(in[j]) = 1.0; c_im(in[j]) = 32.432; } fftw(plan, 1, in, 1, 0, out, 1, 0); } end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end,begin)); fftw_free(in); fftw_free(out); if (t < FFTW_TIME_MIN) { iter *= 2; /* * See D. E. Knuth, Structured Programming with GOTO Statements, * Computing Surveys (6), December 1974, for a justification * of this `goto' in the `n + 1/2' loop. */ goto retry; } return t / (double)iter; }