Exemplo n.º 1
0
void test_correctness(int n)
{
     int howmany;
     fftw_plan validated_plan_forward, validated_plan_backward;

     WHEN_VERBOSE(1,
		  my_printf("Testing correctness for n = %d...", n);
		  my_fflush(stdout));

     /* produce a good plan */
     validated_plan_forward =
	 fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag);
     validated_plan_backward =
	 fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag);

     for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany)
	  test_in_place_both(n, howmany, howmany,
			     validated_plan_forward,
			     validated_plan_backward);

     fftw_destroy_plan(validated_plan_forward);
     fftw_destroy_plan(validated_plan_backward);

     if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak)
	  fftw_check_memory_leaks();

     WHEN_VERBOSE(1, my_printf("OK\n"));
}
Exemplo n.º 2
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_complex *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     in = (fftw_complex *) fftw_malloc(N * howmany_fields *
				       sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					     speed_flag | flags
					     | wisdom_flag | no_vector_flag,
					     in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = fftwnd_create_plan(sz.rank, sz.narray,
				    dir, speed_flag | flags 
				    | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (fftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			  in, howmany_fields, 1, 0, 0, 0),
		   in, N * howmany_fields, t);

     fftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
Exemplo n.º 3
0
void test_out_of_place(int n, int istride, int ostride,
		       int howmany, fftw_direction dir,
		       fftw_plan validated_plan,
		       int specific)
{
     /* one-dim. out-of-place transforms will never be supported in MPI */
     WHEN_VERBOSE(2, my_printf("N/A\n"));
}
Exemplo n.º 4
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t, t0;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t0);
     
     FFTW_TIME_FFT(fftw_threads(nthreads, plan, howmany_fields,
				in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));
     WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
Exemplo n.º 5
0
void test_in_place_both(int n, int istride, int howmany,
			fftw_plan validated_plan_forward,
			fftw_plan validated_plan_backward)
{
     WHEN_VERBOSE(2,
		  my_printf("TEST CORRECTNESS (in place, FFTW_FORWARD, %s) "
			 "n = %d  istride = %d  howmany = %d\n",
			 SPECIFICP(0),
			 n, istride, howmany));
     test_in_place(n, istride, howmany, FFTW_FORWARD,
		   validated_plan_forward, 0);
     
     WHEN_VERBOSE(2,
		  my_printf("TEST CORRECTNESS (in place, FFTW_BACKWARD, %s) "
			 "n = %d  istride = %d  howmany = %d\n",
			 SPECIFICP(0),
			 n, istride, howmany));
     test_in_place(n, istride, howmany, FFTW_BACKWARD,
		   validated_plan_backward, 0);
}
Exemplo n.º 6
0
void test_in_place_both(int n, int istride, int howmany,
			fftw_plan validated_plan_forward,
			fftw_plan validated_plan_backward)
{
     int specific;

     for (specific = 0; specific <= 1; ++specific) {
	  WHEN_VERBOSE(2,
		  printf("TEST CORRECTNESS (in place, FFTW_FORWARD, %s) "
			 "n = %d  istride = %d  howmany = %d\n",
			 SPECIFICP(specific),
			 n, istride, howmany));
	  test_in_place(n, istride, howmany, FFTW_FORWARD,
			validated_plan_forward, specific);

	  WHEN_VERBOSE(2,
		 printf("TEST CORRECTNESS (in place, FFTW_BACKWARD, %s) "
			"n = %d  istride = %d  howmany = %d\n",
			SPECIFICP(specific),
			n, istride, howmany));
	  test_in_place(n, istride, howmany, FFTW_BACKWARD,
			validated_plan_backward, specific);
     }
}
Exemplo n.º 7
0
void test_correctness(int n)
{
     int istride, ostride, howmany;
     fftw_plan validated_plan_forward, validated_plan_backward;

     WHEN_VERBOSE(1,
		  printf("Testing correctness for n = %d...", n);
		  fflush(stdout));

     /* produce a *good* plan (validated by Ergun's test procedure) */
     validated_plan_forward =
	 fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag);
     test_ergun(n, FFTW_FORWARD, validated_plan_forward);
     validated_plan_backward =
	 fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag);
     test_ergun(n, FFTW_BACKWARD, validated_plan_backward);

     for (istride = 1; istride <= MAX_STRIDE; ++istride)
	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride)
	       for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany)
		    test_out_of_place_both(n, istride, ostride, howmany,
					   validated_plan_forward,
					   validated_plan_backward);

     for (istride = 1; istride <= MAX_STRIDE; ++istride)
	  for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany)
	       test_in_place_both(n, istride, howmany,
				  validated_plan_forward,
				  validated_plan_backward);

     fftw_destroy_plan(validated_plan_forward);
     fftw_destroy_plan(validated_plan_backward);

     if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak)
	  fftw_check_memory_leaks();

     WHEN_VERBOSE(1, printf("OK\n"));
}
Exemplo n.º 8
0
/* Same as test_ergun, but for multi-dimensional transforms: */
void testnd_ergun(int rank, int *n, fftw_direction dir, fftwnd_plan plan)
{
     fftw_complex *inA, *inB, *inC, *outA, *outB, *outC;
     fftw_complex *tmp;
     fftw_complex impulse;

     int N, n_before, n_after, dim;
     int i, which_impulse;
     int rounds = 20;
     FFTW_TRIG_REAL twopin;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     inA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     inB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     inC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     outA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     outB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     outC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     tmp = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     WHEN_VERBOSE(2,
		  printf("Validating plan, N = %d, dir = %s\n", N,
			 dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD"));

     /* test 1: check linearity */
     for (i = 0; i < rounds; ++i) {
	  fftw_complex alpha, beta;
	  c_re(alpha) = DRAND();
	  c_im(alpha) = DRAND();
	  c_re(beta) = DRAND();
	  c_im(beta) = DRAND();
	  fill_random(inA, N);
	  fill_random(inB, N);
	  fftwnd(plan, 1, inA, 1, N, outA, 1, N);
	  fftwnd(plan, 1, inB, 1, N, outB, 1, N);
	  array_scale(outA, alpha, N);
	  array_scale(outB, beta, N);
	  array_add(tmp, outA, outB, N);
	  array_scale(inA, alpha, N);
	  array_scale(inB, beta, N);
	  array_add(inC, inA, inB, N);
	  fftwnd(plan, 1, inC, 1, N, outC, 1, N);
	  array_compare(outC, tmp, N);
     }

     /*
      * test 2: check that the unit impulse is transformed properly -- we
      * need to test both the real and imaginary impulses 
      */

     for (which_impulse = 0; which_impulse < 2; ++which_impulse) {
	  if (which_impulse == 0) {	/* real impulse */
	       c_re(impulse) = 1.0;
	       c_im(impulse) = 0.0;
	  } else {		/* imaginary impulse */
	       c_re(impulse) = 0.0;
	       c_im(impulse) = 1.0;
	  }

	  for (i = 0; i < N; ++i) {
	       /* impulse */
	       c_re(inA[i]) = 0.0;
	       c_im(inA[i]) = 0.0;

	       /* transform of the impulse */
	       outA[i] = impulse;
	  }
	  inA[0] = impulse;

	  for (i = 0; i < rounds; ++i) {
	       fill_random(inB, N);
	       array_sub(inC, inA, inB, N);
	       fftwnd(plan, 1, inB, 1, N, outB, 1, N);
	       fftwnd(plan, 1, inC, 1, N, outC, 1, N);
	       array_add(tmp, outB, outC, N);
	       array_compare(tmp, outA, N);
	  }
     }

     /* test 3: check the time-shift property */
     /* the paper performs more tests, but this code should be fine too */
     /* -- we have to check shifts in each dimension */

     n_before = 1;
     n_after = N;
     for (dim = 0; dim < rank; ++dim) {
	  int n_cur = n[dim];

	  n_after /= n_cur;
	  twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n_cur;

	  for (i = 0; i < rounds; ++i) {
	       int j, jb, ja;

	       fill_random(inA, N);
	       array_rol(inB, inA, n_cur, n_before, n_after);
	       fftwnd(plan, 1, inA, 1, N, outA, 1, N);
	       fftwnd(plan, 1, inB, 1, N, outB, 1, N);

	       for (jb = 0; jb < n_before; ++jb)
		    for (j = 0; j < n_cur; ++j) {
			 FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
			 FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);

			 for (ja = 0; ja < n_after; ++ja) {
			      c_re(tmp[(jb * n_cur + j) * n_after + ja]) =
				  c_re(outB[(jb * n_cur + j) * n_after + ja]) * c
				  - c_im(outB[(jb * n_cur + j) * n_after + ja]) * s;
			      c_im(tmp[(jb * n_cur + j) * n_after + ja]) =
				  c_re(outB[(jb * n_cur + j) * n_after + ja]) * s
				  + c_im(outB[(jb * n_cur + j) * n_after + ja]) * c;
			 }
		    }

	       array_compare(tmp, outA, N);
	  }

	  n_before *= n_cur;
     }

     WHEN_VERBOSE(2, printf("Validation done\n"));

     fftw_free(tmp);
     fftw_free(outC);
     fftw_free(outB);
     fftw_free(outA);
     fftw_free(inC);
     fftw_free(inB);
     fftw_free(inA);
}
Exemplo n.º 9
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size;
     fftw_complex *in1, *work = NULL, *in2, *out2;
     fftw_mpi_plan plan;
     int i;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (specific) {
	  WHEN_VERBOSE(2, my_printf("N/A\n"));
	  return;
     }

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags);

     fftw_mpi_local_sizes(plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     in1 = (fftw_complex *) fftw_malloc(total_local_size 
					* sizeof(fftw_complex) * howmany);
     if (coinflip()) {
	  WHEN_VERBOSE(2, my_printf("w/work..."));
	  work = (fftw_complex *) fftw_malloc(total_local_size
                                        * sizeof(fftw_complex) * howmany);
     }
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in2[i]) = DRAND();
	  c_im(in2[i]) = DRAND();
     }
     for (i = 0; i < local_n * howmany; ++i) {
	  c_re(in1[i]) = c_re(in2[i + local_start*howmany]);
	  c_im(in1[i]) = c_im(in2[i + local_start*howmany]);
     }	  

     /* fft-ize */
     fftw_mpi(plan, howmany, in1, work);

     fftw_mpi_destroy_plan(plan);

     fftw(validated_plan, howmany, in2, howmany, 1, out2, howmany, 1);

     CHECK(compute_error_complex(in1, 1,
				 out2 + local_start_after_transform*howmany, 1,
				 howmany*local_n_after_transform) < TOLERANCE,
	   "test_in_place: wrong answer");

     WHEN_VERBOSE(2, my_printf("OK\n"));

     fftw_free(in1);
     fftw_free(work);
     fftw_free(in2);
     fftw_free(out2);
}
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan,
		     int alternate_api, int specific)
{
     int istride, ostride, howmany;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real));
     out3 = in1;
     out1 = (fftw_complex *) in1;
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (alternate_api && specific && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  } else {
	       p = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  }
     } else if (specific) {
	  p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX,
					   flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
	  ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL,
					    flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
     } else if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL,
					flags);
	  } else {
	       p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL,
					flags);
	  }
     } else {
	  p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
	  ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     }

     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i)
	  out3[i] = 0;

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nhc * 2 + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  howmany = ostride = istride;

	  WHEN_VERBOSE(2, printf("\n    testing in-place stride %d...",
				 istride));

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
				       out1, ostride, 1);
	  else
	       rfftwnd_one_real_to_complex(p, in1, NULL);

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						ostride,
						out2 + i * nr, 1,
						nhc) < TOLERANCE,
			  "in-place (r2c): wrong answer");

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
				       out3, istride, 1);
	  else
	       rfftwnd_one_complex_to_real(ip, out1, NULL);

	  for (i = 0; i < nc * nhc * 2 * istride; ++i)
	       out3[i] *= 1.0 / N;

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error(out3 + i * nhc * 2 * istride + k,
					istride,
					(fftw_real *) (in2 + i * nr), 2,
					nr) < TOLERANCE,
			  "in-place (c2r): wrong answer (check 2)");
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
Exemplo n.º 11
0
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     int local_nx, local_x_start, local_ny_after_transpose,
	  local_y_start_after_transpose, total_local_size;
     fftw_complex *in, *work;
     fftwnd_plan plan = 0;
     fftwnd_mpi_plan mpi_plan;
     double t, t0 = 0.0;
     int i, N;
     
     if (sz.rank < 2)
	  return;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank; ++i)
	  N *= (sz.narray[i]);

     if (specific) {
	  return;
     } else {
	  if (io_okay && !only_parallel)
	       plan = fftwnd_create_plan(sz.rank, sz.narray,
					 dir, speed_flag | flags
					 | wisdom_flag | no_vector_flag);
	  mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray,
					    dir, speed_flag | flags
					    | wisdom_flag | no_vector_flag);
     }
     CHECK(mpi_plan != NULL, "can't create plan");

     fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start,
			    &local_ny_after_transpose,
			    &local_y_start_after_transpose,
			    &total_local_size);

     if (io_okay && !only_parallel)
	  in = (fftw_complex *) fftw_malloc(N * howmany_fields *
					    sizeof(fftw_complex));
     else
	  in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					    sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields *
					 sizeof(fftw_complex));
     
     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftwnd(plan, howmany_fields,
			      in, howmany_fields, 1, 0, 0, 0),
		       in, N * howmany_fields, t0);

	  fftwnd_destroy_plan(plan);
	  
	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n",
				 smart_sprint_time(t0)));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, NULL, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	    WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_NORMAL_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields,
			     in, work, FFTW_TRANSPOSED_ORDER),
		   in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s",
				 ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
	  WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / "
				 "(t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, N)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftwnd_mpi_destroy_plan(mpi_plan);

     fftw_free(in);
     fftw_free(work);

     WHEN_VERBOSE(1, my_printf("\n"));
}
Exemplo n.º 12
0
void test_planner(int rank)
{
     /* 
      * create and destroy many plans, at random.  Check the
      * garbage-collecting allocator of twiddle factors 
      */
     int i, dim;
     int r, s;
     fftw_plan p[PLANNER_TEST_SIZE];
     fftwnd_plan pnd[PLANNER_TEST_SIZE];
     int *narr, maxdim;

     chk_mem_leak = 0;
     verbose--;

     please_wait();
     if (rank < 1)
	  rank = 1;

     narr = (int *) fftw_malloc(rank * sizeof(int));

     maxdim = (int) pow(8192.0, 1.0/rank);

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  p[i] = (fftw_plan) 0;
	  pnd[i] = (fftwnd_plan) 0;
     }

     for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) {
	  r = rand();
	  if (r < 0)
	       r = -r;
	  r = r % PLANNER_TEST_SIZE;

	  for (dim = 0; dim < rank; ++dim) {
	       do {
		    s = rand();
		    if (s < 0)
			 s = -s;
		    s = s % maxdim + 1;
	       } while (s == 0);
	       narr[dim] = s;
	  }

	  if (rank == 1) {
	       if (p[r])
		    fftw_destroy_plan(p[r]);

	       p[r] = fftw_create_plan(narr[0], random_dir(), measure_flag |
				       wisdom_flag);
	       if (paranoid && narr[0] < 200)
		    test_correctness(narr[0]);
	  }

	  if (pnd[r])
	       fftwnd_destroy_plan(pnd[r]);

	  pnd[r] = fftwnd_create_plan(rank, narr,
				      random_dir(), measure_flag |
				      wisdom_flag);

	  if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) {
	       WHEN_VERBOSE(0, printf("test planner: so far so good\n"));
	       WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n",
			      i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE));
	  }
     }

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  if (p[i])
	       fftw_destroy_plan(p[i]);
	  if (pnd[i])
	       fftwnd_destroy_plan(pnd[i]);
     }

     fftw_free(narr);
     verbose++;
     chk_mem_leak = 1;
}
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE
					 * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
     ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nr + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }
	  for (i = 0; i < N * istride; ++i)
	       out3[i] = 0.0;

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       WHEN_VERBOSE(2, printf("\n    testing stride %d/%d...",
				      istride, ostride));

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
					    out1, ostride, 1);
	       else
		    rfftwnd_one_real_to_complex(p, in1, out1);

	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						     ostride,
						     out2 + i * nr, 1,
						     nhc) < TOLERANCE,
			       "out-of-place (r2c): wrong answer");

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
					    out3, istride, 1);
	       else
		    rfftwnd_one_complex_to_real(ip, out1, out3);

	       for (i = 0; i < N * istride; ++i)
		    out3[i] *= 1.0 / N;

	       if (istride == howmany)
		    CHECK(compute_error(out3, 1, in1, 1, N * istride)
			< TOLERANCE, "out-of-place (c2r): wrong answer");
	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error(out3 + i * nr * istride + k,
					     istride,
					 (fftw_real *) (in2 + i * nr), 2,
					     nr) < TOLERANCE,
			   "out-of-place (c2r): wrong answer (check 2)");
	  }
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out3);
     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Exemplo n.º 14
0
void test_planner(int rank)
{
     /*
      * create and destroy many plans, at random.  Check the
      * garbage-collecting allocator of twiddle factors
      */
     int i, dim;
     int r, s;
     fftw_mpi_plan p[PLANNER_TEST_SIZE];
     fftwnd_mpi_plan pnd[PLANNER_TEST_SIZE];
     int *narr, maxdim;

     chk_mem_leak = 0;
     verbose--;

     please_wait();
     if (rank < 1)
          rank = 1;

     narr = (int *) fftw_malloc(rank * sizeof(int));

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
          p[i] = (fftw_mpi_plan) 0;
          pnd[i] = (fftwnd_mpi_plan) 0;
     }

     if (PLANNER_TEST_SIZE >= 8) {
	  p[0] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0);
	  p[1] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0);
	  p[2] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0);
	  p[3] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0);
	  p[4] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0);
	  p[5] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_FORWARD, 0);
	  p[6] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0);
	  p[7] = fftw_mpi_create_plan(MPI_COMM_WORLD, 1024, FFTW_BACKWARD, 0);
     }

     maxdim = (int) pow(8192, 1.0/rank);

     for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) {
          r = rand();
          if (r < 0)
               r = -r;
          r = r % PLANNER_TEST_SIZE;

          for (dim = 0; dim < rank; ++dim) {
               do {
                    s = rand();
                    if (s < 0)
                         s = -s;
                    s = s % maxdim + 1;
               } while (s == 0);
               narr[dim] = s;
          }

          if (rank == 1) {
               if (p[r])
                    fftw_mpi_destroy_plan(p[r]);

               p[r] = fftw_mpi_create_plan(MPI_COMM_WORLD,
					   narr[0], random_dir(), 
					   measure_flag | wisdom_flag);
          }

	  if (rank > 1) {
	       if (pnd[r])
		    fftwnd_mpi_destroy_plan(pnd[r]);
	       
	       pnd[r] = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, narr,
					       random_dir(), measure_flag |
					       wisdom_flag);
	  }

          if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) {
               WHEN_VERBOSE(0, my_printf("test planner: so far so good\n"));
               WHEN_VERBOSE(0, my_printf("test planner: iteration %d"
					 " out of %d\n",
                              i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE));
          }
     }

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
          if (p[i])
               fftw_mpi_destroy_plan(p[i]);
          if (pnd[i])
               fftwnd_mpi_destroy_plan(pnd[i]);
     }

     fftw_free(narr);
     verbose++;
     chk_mem_leak = 1;
}
/*
 * This is a real (as opposed to complex) variation of the FFT tester
 * described in
 *
 * Funda Ergün. Testing multivariate linear functions: Overcoming the
 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual
 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
 * Nevada, 29 May--1 June 1995.
 */
void test_ergun(int n, fftw_direction dir, fftw_plan plan)
{
     fftw_real *inA, *inB, *inC, *outA, *outB, *outC;
     fftw_real *inA1, *inB1;
     fftw_real *tmp;
     int i;
     int rounds = 20;
     FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n;

     inA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inA1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inB1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     inC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     outA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     outB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     outC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
     tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));

     WHEN_VERBOSE(2,
		  printf("Validating plan, n = %d, dir = %s\n", n,
			 dir == FFTW_REAL_TO_COMPLEX ? 
			 "REAL_TO_COMPLEX" : "COMPLEX_TO_REAL"));

     /* test 1: check linearity */
     for (i = 0; i < rounds; ++i) {
	  fftw_real alpha, beta;
	  alpha = DRAND();
	  beta = DRAND();
	  rfill_random(inA, n);
	  rfill_random(inB, n);
	  rarray_scale(inA1, inA, alpha, n);
	  rarray_scale(inB1, inB, beta, n);
	  rarray_add(inC, inA1, inB1, n);
	  rfftw_out_of_place(plan, n, inA, outA);
	  rfftw_out_of_place(plan, n, inB, outB);
	  rarray_scale(outA, outA, alpha, n);
	  rarray_scale(outB, outB, beta, n);
	  rarray_add(tmp, outA, outB, n);
	  rfftw_out_of_place(plan, n, inC, outC);
	  rarray_compare(outC, tmp, n);
     }

     /* test 2: check that the unit impulse is transformed properly */
     for (i = 0; i < n; ++i) {
	  /* impulse */
	  inA[i] = 0.0;
	  
	  /* transform of the impulse */
	  if (2 * i <= n)
	       outA[i] = 1.0;
	  else
	       outA[i] = 0.0;
     }
     inA[0] = 1.0;

     if (dir == FFTW_REAL_TO_COMPLEX) {
	  for (i = 0; i < rounds; ++i) {
	       rfill_random(inB, n);
	       rarray_sub(inC, inA, inB, n);
	       rfftw_out_of_place(plan, n, inB, outB);
	       rfftw_out_of_place(plan, n, inC, outC);
	       rarray_add(tmp, outB, outC, n);
	       rarray_compare(tmp, outA, n);
	  }
     } else {
	  for (i = 0; i < rounds; ++i) {
	       rfill_random(outB, n);
	       rarray_sub(outC, outA, outB, n);
	       rfftw_out_of_place(plan, n, outB, inB);
	       rfftw_out_of_place(plan, n, outC, inC);
	       rarray_add(tmp, inB, inC, n);
	       rarray_scale(tmp, tmp, 1.0 / ((double) n), n);
	       rarray_compare(tmp, inA, n);
	  }
     }

     /* test 3: check the time-shift property */
     /* the paper performs more tests, but this code should be fine too */
     if (dir == FFTW_REAL_TO_COMPLEX) {
	  for (i = 0; i < rounds; ++i) {
	       int j;

	       rfill_random(inA, n);
	       rarray_rol(inB, inA, n, 1, 1);
	       rfftw_out_of_place(plan, n, inA, outA);
	       rfftw_out_of_place(plan, n, inB, outB);
	       tmp[0] = outB[0];
	       for (j = 1; 2 * j < n; ++j) {
		    FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
		    FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);
		    tmp[j] = outB[j] * c - outB[n - j] * s;
		    tmp[n - j] = outB[j] * s + outB[n - j] * c;
	       }
	       if (2 * j == n)
		    tmp[j] = -outB[j];

	       rarray_compare(tmp, outA, n);
	  }
     } else {
	  for (i = 0; i < rounds; ++i) {
	       int j;

	       rfill_random(inA, n);
	       inB[0] = inA[0];
	       for (j = 1; 2 * j < n; ++j) {
		    FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
		    FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);
		    inB[j] = inA[j] * c - inA[n - j] * s;
		    inB[n - j] = inA[j] * s + inA[n - j] * c;
	       }
	       if (2 * j == n)
		    inB[j] = -inA[j];

	       rfftw_out_of_place(plan, n, inA, outA);
	       rfftw_out_of_place(plan, n, inB, outB);	       
	       rarray_rol(tmp, outA, n, 1, 1);
	       rarray_compare(tmp, outB, n);
	  }
     }

     WHEN_VERBOSE(2, printf("Validation done\n"));

     fftw_free(tmp);
     fftw_free(outC);
     fftw_free(outB);
     fftw_free(outA);
     fftw_free(inC);
     fftw_free(inB1);
     fftw_free(inA1);
     fftw_free(inB);
     fftw_free(inA);
}
Exemplo n.º 16
0
static void fftw_plan_hook_function(fftw_plan p)
{
     WHEN_VERBOSE(3, printf("Validating tentative plan\n"));
     WHEN_VERBOSE(3, fftw_print_plan(p));
     test_ergun(p->n, p->dir, p);
}
Exemplo n.º 17
0
/*
 * Implementation of the FFT tester described in
 *
 * Funda Ergün. Testing multivariate linear functions: Overcoming the
 * generator bottleneck. In Proceedings of the Twenty-Seventh Annual
 * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
 * Nevada, 29 May--1 June 1995.
 */
void test_ergun(int n, fftw_direction dir, fftw_plan plan)
{
     fftw_complex *inA, *inB, *inC, *outA, *outB, *outC;
     fftw_complex *tmp;
     fftw_complex impulse;
     int i;
     int rounds = 20;
     FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n;

     inA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     inB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     inC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     outA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     outB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     outC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));

     WHEN_VERBOSE(2,
		  printf("Validating plan, n = %d, dir = %s\n", n,
			 dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD"));

     /* test 1: check linearity */
     for (i = 0; i < rounds; ++i) {
	  fftw_complex alpha, beta;
	  c_re(alpha) = DRAND();
	  c_im(alpha) = DRAND();
	  c_re(beta) = DRAND();
	  c_im(beta) = DRAND();
	  fill_random(inA, n);
	  fill_random(inB, n);
	  fftw_out_of_place(plan, n, inA, outA);
	  fftw_out_of_place(plan, n, inB, outB);
	  array_scale(outA, alpha, n);
	  array_scale(outB, beta, n);
	  array_add(tmp, outA, outB, n);
	  array_scale(inA, alpha, n);
	  array_scale(inB, beta, n);
	  array_add(inC, inA, inB, n);
	  fftw_out_of_place(plan, n, inC, outC);
	  array_compare(outC, tmp, n);
     }

     /* test 2: check that the unit impulse is transformed properly */

     c_re(impulse) = 1.0;
     c_im(impulse) = 0.0;
     
     for (i = 0; i < n; ++i) {
	  /* impulse */
	  c_re(inA[i]) = 0.0;
	  c_im(inA[i]) = 0.0;
	  
	  /* transform of the impulse */
	  outA[i] = impulse;
     }
     inA[0] = impulse;
     
     for (i = 0; i < rounds; ++i) {
	  fill_random(inB, n);
	  array_sub(inC, inA, inB, n);
	  fftw_out_of_place(plan, n, inB, outB);
	  fftw_out_of_place(plan, n, inC, outC);
	  array_add(tmp, outB, outC, n);
	  array_compare(tmp, outA, n);
     }

     /* test 3: check the time-shift property */
     /* the paper performs more tests, but this code should be fine too */
     for (i = 0; i < rounds; ++i) {
	  int j;

	  fill_random(inA, n);
	  array_rol(inB, inA, n, 1, 1);
	  fftw_out_of_place(plan, n, inA, outA);
	  fftw_out_of_place(plan, n, inB, outB);
	  for (j = 0; j < n; ++j) {
	       FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin);
	       FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin);
	       c_re(tmp[j]) = c_re(outB[j]) * c - c_im(outB[j]) * s;
	       c_im(tmp[j]) = c_re(outB[j]) * s + c_im(outB[j]) * c;
	  }

	  array_compare(tmp, outA, n);
     }

     WHEN_VERBOSE(2, printf("Validation done\n"));

     fftw_free(tmp);
     fftw_free(outC);
     fftw_free(outB);
     fftw_free(outA);
     fftw_free(inC);
     fftw_free(inB);
     fftw_free(inA);
}
Exemplo n.º 18
0
void testnd_in_place(int rank, int *n, fftw_direction dir,
		     fftwnd_plan validated_plan,
		     int alternate_api, int specific, int force_buffered)
{
     int local_nx, local_x_start, local_ny_after_transpose,
          local_y_start_after_transpose, total_local_size;
     int istride;
     int N, dim, i;
     fftw_complex *in1, *work = 0, *in2;
     fftwnd_mpi_plan p = 0;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (specific || rank < 2)
	  return;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     if (force_buffered)
	  flags |= FFTWND_FORCE_BUFFERED;

     N = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];

     if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2)
	       p = fftw2d_mpi_create_plan(MPI_COMM_WORLD,
					  n[0], n[1], dir, flags);
	  else
	       p = fftw3d_mpi_create_plan(MPI_COMM_WORLD,
					  n[0], n[1], n[2], dir, flags);
     }
     else		/* standard api */
	  p = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, dir, flags);

     fftwnd_mpi_local_sizes(p, &local_nx, &local_x_start,
                            &local_ny_after_transpose,
                            &local_y_start_after_transpose,
                            &total_local_size);

     in1 = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE
					* sizeof(fftw_complex));
     if (coinflip()) {
	  WHEN_VERBOSE(1, my_printf("w/work..."));
	  work = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE
					      * sizeof(fftw_complex));
     }
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < N; ++i) {
	       c_re(in2[i]) = DRAND();
	       c_im(in2[i]) = DRAND();
	  }

	  for (i = 0; i < local_nx * (N/n[0]); ++i) {
	       int j;
	       for (j = 0; j < istride; ++j) {
		    c_re(in1[i * istride + j]) = c_re((in2 + local_x_start 
						       * (N/n[0])) [i]);
		    c_im(in1[i * istride + j]) = c_im((in2 + local_x_start
                                                       * (N/n[0])) [i]);
	       }
	  }

	  fftwnd_mpi(p, istride, in1, work, FFTW_NORMAL_ORDER);

	  fftwnd(validated_plan, 1, in2, 1, 1, NULL, 0, 0);

	  for (i = 0; i < istride; ++i)
	       CHECK(compute_error_complex(in1 + i, istride,
					   in2 + local_x_start * (N/n[0]),
					   1, local_nx * (N/n[0])) < TOLERANCE,
		     "testnd_in_place: wrong answer");
     }

     fftwnd_mpi_destroy_plan(p);

     fftw_free(in2);
     fftw_free(work);
     fftw_free(in1);
}
Exemplo n.º 19
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     fftw_complex *in1, *in2, *out2;
     fftw_plan plan;
     int i, j;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany);
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     if (!specific)
	  plan = fftw_create_plan(n, dir, flags);
     else
	  plan = fftw_create_plan_specific(n, dir, flags,
					   in1, istride,
					   (fftw_complex *) NULL, 0);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in1[i * istride]) = c_re(in2[i]) = DRAND();
	  c_im(in1[i * istride]) = c_im(in2[i]) = DRAND();
     }

     /* 
      * fill in other positions of the array, to make sure that
      * fftw doesn't overwrite them 
      */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i) {
	       c_re(in1[i * istride + j]) = i * istride + j;
	       c_im(in1[i * istride + j]) = i * istride - j;
	  }
     CHECK(plan != NULL, "can't create plan");
     WHEN_VERBOSE(2, fftw_print_plan(plan));

     /* fft-ize */
     if (howmany != 1 || istride != 1 || coinflip())
	  fftw(plan, howmany, in1, istride, n * istride,
	       (fftw_complex *) NULL, 0, 0);
     else
	  fftw_one(plan, in1, NULL);

     fftw_destroy_plan(plan);

     /* check for overwriting */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       CHECK(c_re(in1[i * istride + j]) == i * istride + j &&
		     c_im(in1[i * istride + j]) == i * istride - j,
		     "input has been overwritten");

     for (i = 0; i < howmany; ++i) {
	  fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n);
     }

     CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE,
	   "test_in_place: wrong answer");
     WHEN_VERBOSE(2, printf("OK\n"));

     fftw_free(in1);
     fftw_free(in2);
     fftw_free(out2);
}
Exemplo n.º 20
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     int local_n, local_start, local_n_after_transform,
	  local_start_after_transform, total_local_size, nalloc;
     fftw_complex *in, *work;
     fftw_plan plan = 0;
     fftw_mpi_plan mpi_plan;
     double t, t0 = 0.0;

     if (specific || !(flags & FFTW_IN_PLACE))
	  return;

     if (io_okay && !only_parallel)
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);

     mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir,
				     speed_flag | flags
				     | wisdom_flag | no_vector_flag);

     CHECK(mpi_plan, "failed to create plan!");

     fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start,
			  &local_n_after_transform,
			  &local_start_after_transform,
			  &total_local_size);

     if (io_okay && !only_parallel)
	  nalloc = n;
     else
	  nalloc = total_local_size;

     in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
				       * sizeof(fftw_complex));
     work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields
					 * sizeof(fftw_complex));

     if (io_okay) {
	  WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan));
     }

     if (io_okay && !only_parallel) {
	  FFTW_TIME_FFT(fftw(plan, howmany_fields,
			     in, howmany_fields, 1, work, 1, 0),
			in, n * howmany_fields, t0);

	  fftw_destroy_plan(plan);

	  WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     }
     
     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	       WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));
     }

     MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work),
		  in, total_local_size * howmany_fields, t);

     if (io_okay) {
	  WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t)));
	  WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
	  WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)"
				 " = %f\n", howmany_fields * mflops(t, n)));
	  if (!only_parallel)
	     WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t));
     }

     fftw_free(in);
     fftw_free(work);
     fftw_mpi_destroy_plan(mpi_plan);

     WHEN_VERBOSE(1, my_printf("\n"));
}
void test_in_place(int n, int istride,
		   int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     fftw_complex *in2, *out2;
     fftw_real *in1, *out1, *out3;
     fftw_plan plan;
     int i, j;
     int ostride = istride;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     in1 = (fftw_real *) fftw_malloc(istride * n * sizeof(fftw_real) * howmany);
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     out1 = in1;
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
     out3 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));

     if (!specific)
	  plan = rfftw_create_plan(n, dir, flags);
     else
	  plan = rfftw_create_plan_specific(n, dir, flags,
					    in1, istride, out1, ostride);
     CHECK(plan != NULL, "can't create plan");

     /* generate random inputs */
     fill_random(in1, n, istride);
     for (j = 1; j < howmany; ++j)
	  for (i = 0; i < n; ++i)
	       in1[(j * n + i) * istride] = in1[i * istride];

     /* copy random inputs to complex array for comparison with fftw: */
     if (dir == FFTW_REAL_TO_COMPLEX)
	  for (i = 0; i < n; ++i) {
	       c_re(in2[i]) = in1[i * istride];
	       c_im(in2[i]) = 0.0;
     } else {
	  int n2 = (n + 1) / 2;
	  c_re(in2[0]) = in1[0];
	  c_im(in2[0]) = 0.0;
	  for (i = 1; i < n2; ++i) {
	       c_re(in2[i]) = in1[i * istride];
	       c_im(in2[i]) = in1[(n - i) * istride];
	  }
	  if (n2 * 2 == n) {
	       c_re(in2[n2]) = in1[n2 * istride];
	       c_im(in2[n2]) = 0.0;
	       ++i;
	  }
	  for (; i < n; ++i) {
	       c_re(in2[i]) = c_re(in2[n - i]);
	       c_im(in2[i]) = -c_im(in2[n - i]);
	  }
     }

     /* 
      * fill in other positions of the array, to make sure that
      * rfftw doesn't overwrite them 
      */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       in1[i * istride + j] = i * istride + j;

     WHEN_VERBOSE(2, rfftw_print_plan(plan));

     /* fft-ize */
     if (howmany != 1 || istride != 1 || coinflip())
	  rfftw(plan, howmany, in1, istride, n * istride, 0, 0, 0);
     else
	  rfftw_one(plan, in1, NULL);

     rfftw_destroy_plan(plan);

     /* check for overwriting */
     for (j = 1; j < ostride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       CHECK(out1[i * ostride + j] == i * ostride + j,
		     "output has been overwritten");

     fftw(validated_plan, 1, in2, 1, n, out2, 1, n);

     if (dir == FFTW_REAL_TO_COMPLEX) {
	  int n2 = (n + 1) / 2;
	  out3[0] = c_re(out2[0]);
	  for (i = 1; i < n2; ++i) {
	       out3[i] = c_re(out2[i]);
	       out3[n - i] = c_im(out2[i]);
	  }
	  if (n2 * 2 == n)
	       out3[n2] = c_re(out2[n2]);
     } else {
	  for (i = 0; i < n; ++i)
	       out3[i] = c_re(out2[i]);
     }

     for (j = 0; j < howmany; ++j)
	  CHECK(compute_error(out1 + j * n * ostride, ostride, out3, 1, n)
		< TOLERANCE,
		"test_in_place: wrong answer");
     WHEN_VERBOSE(2, printf("OK\n"));

     fftw_free(in1);
     fftw_free(in2);
     fftw_free(out2);
     fftw_free(out3);
}