Beispiel #1
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_complex *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     in = (fftw_complex *) fftw_malloc(N * howmany_fields *
				       sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					     speed_flag | flags
					     | wisdom_flag | no_vector_flag,
					     in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan(sz.rank, sz.narray,
				    dir, speed_flag | flags 
				    | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (fftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t);

     fftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
Beispiel #2
0
/*
 * Same as fftw_measure_runtime, except for fftwnd plan.
 */
double fftwnd_measure_runtime(fftwnd_plan plan,
			      fftw_complex *in, int istride,
			      fftw_complex *out, int ostride)
{
     fftw_time begin, end, start;
     double t, tmax, tmin;
     int i, iter;
     int n;
     int repeat;

     if (plan->rank == 0)
	  return 0.0;

     n = 1;
     for (i = 0; i < plan->rank; ++i)
	  n *= plan->n[i];

     iter = 1;

     for (;;) {
	  tmin = 1.0E10;
	  tmax = -1.0E10;
	  init_test_array(in, istride, n);

	  start = fftw_get_time();
	  /* repeat the measurement FFTW_TIME_REPEAT times */
	  for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) {
	       begin = fftw_get_time();
	       for (i = 0; i < iter; ++i) {
		    fftwnd(plan, 1, in, istride, 0, out, ostride, 0);
	       }
	       end = fftw_get_time();

	       t = fftw_time_to_sec(fftw_time_diff(end, begin));
	       if (t < tmin)
		    tmin = t;
	       if (t > tmax)
		    tmax = t;

	       /* do not run for too long */
	       t = fftw_time_to_sec(fftw_time_diff(end, start));
	       if (t > FFTW_TIME_LIMIT)
		    break;
	  }

	  if (tmin >= FFTW_TIME_MIN)
	       break;

	  iter *= 2;
     }

     tmin /= (double) iter;
     tmax /= (double) iter;

     return tmin;
}
Beispiel #3
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t, t0;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t0);
     
     FFTW_TIME_FFT(fftw_threads(nthreads, plan, howmany_fields,
				in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));
     WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
Beispiel #4
0
/*
 * The timer keeps doubling the number of iterations
 * until the program runs for more than FFTW_TIME_MIN
 */
double fftw_measure_runtime(fftw_plan plan)
{
     FFTW_COMPLEX *in, *out;
     fftw_time begin, end;
     double t;
     int i, iter;
     int n;

     n = plan->n;

     iter = 1;

retry:
     in = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX));
     out = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX));

     begin = fftw_get_time();
     for (i = 0; i < iter; ++i) {
	  int j;

	  /* generate random inputs */
	  for (j = 0; j < n; ++j) {
	       c_re(in[j]) = 1.0;
	       c_im(in[j]) = 32.432;
	  }

	  fftw(plan, 1, in, 1, 0, out, 1, 0);
     }
     end = fftw_get_time();

     t = fftw_time_to_sec(fftw_time_diff(end,begin));

     fftw_free(in);
     fftw_free(out);

     if (t < FFTW_TIME_MIN) {
	  iter *= 2;
	  /* 
	   * See D. E. Knuth, Structured Programming with GOTO Statements,
	   * Computing Surveys (6), December 1974, for a justification
	   * of this `goto' in the `n + 1/2' loop.
	   */
	  goto retry;
     }

     return t / (double)iter;
}