Example #1
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t, t0;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t0);
     
     FFTW_TIME_FFT(fftw_threads(nthreads, plan, howmany_fields,
				in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));
     WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
Example #2
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_complex *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     in = (fftw_complex *) fftw_malloc(N * howmany_fields *
				       sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					     speed_flag | flags
					     | wisdom_flag | no_vector_flag,
					     in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan(sz.rank, sz.narray,
				    dir, speed_flag | flags 
				    | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (fftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t);

     fftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
Example #3
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size, nalloc;
     fftw_complex *in, *work;
     fftw_plan plan = 0;
     fftw_mpi_plan mpi_plan;
     double t, t0 = 0.0;

     if (specific || !(flags & FFTW_IN_PLACE))
	  return;

     if (io_okay && !only_parallel)
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);

     mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir,
				     speed_flag | flags
				     | wisdom_flag | no_vector_flag);

     CHECK(mpi_plan, "failed to create plan!");

     fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     if (io_okay && !only_parallel)
	  nalloc = n;
     else
	  nalloc = total_local_size;

     in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
				       * sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
					 * sizeof(fftw_complex));

     if (io_okay) {
	  WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan));
     }

     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftw(plan, howmany_fields,
			     in, howmany_fields, 1, work, 1, 0),
			in, n * howmany_fields, t0);

	  fftw_destroy_plan(plan);

	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     }
     
     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftw_free(in);
     fftw_free(work);
     fftw_mpi_destroy_plan(mpi_plan);

     WHEN_VERBOSE(1, my_printf("\n"));
}
Example #4
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     int local_nx, local_x_start, local_ny_after_transpose,
	  local_y_start_after_transpose, total_local_size;
     fftw_complex *in, *work;
     fftwnd_plan plan = 0;
     fftwnd_mpi_plan mpi_plan;
     double t, t0 = 0.0;
     int i, N;
     
     if (sz.rank < 2)
	  return;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     if (specific) {
	  return;
     } else {
	  if (io_okay && !only_parallel)
	       plan = fftwnd_create_plan(sz.rank, sz.narray,
					 dir, speed_flag | flags
					 | wisdom_flag | no_vector_flag);
	  mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray,
					    dir, speed_flag | flags
					    | wisdom_flag | no_vector_flag);
     }
     CHECK(mpi_plan != NULL, "can't create plan");

     fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start,
			    &local_ny_after_transpose,
			    &local_y_start_after_transpose,
			    &total_local_size);

     if (io_okay && !only_parallel)
	  in = (fftw_complex *) fftw_malloc(N * howmany_fields *
					    sizeof(fftw_complex));
     else
	  in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					    sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					 sizeof(fftw_complex));
     
     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			      in, howmany_fields, 1, 0, 0, 0),
		       in, N * howmany_fields, t0);

	  fftwnd_destroy_plan(plan);
	  
	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n",
				 smart_sprint_time(t0)));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	    WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftwnd_mpi_destroy_plan(mpi_plan);

     fftw_free(in);
     fftw_free(work);

     WHEN_VERBOSE(1, my_printf("\n"));
}