Beispiel #1
0
static void fftwnd_threads_aux(int nthreads, fftwnd_plan p, int cur_dim, 
			       fftw_complex *in, int istride,
			       fftw_complex *out, int ostride)
{
     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];

     if (cur_dim == p->rank - 2) {
	  /* just do the last dimension directly: */
	  if (p->is_in_place)
	       fftw_threads(nthreads, p->plans[p->rank - 1], n,
			    in, istride, n_after * istride,
			    (fftw_complex*)NULL, 0, 0);
	  else
	       fftw_threads(nthreads, p->plans[p->rank - 1], n,
			    in,  istride, n_after * istride,
			    out, ostride, n_after * ostride);
     }
     else { /* we have at least two dimensions to go */
	  /* process the subsequent dimensions recursively, in hyperslabs,
	     to get maximum locality: */
	  fftwnd_aux_many_threads(nthreads, n, n_after,
				  p, cur_dim + 1,
				  in, istride, out, ostride);
     }

     /* do the current dimension (in-place): */
     fftw_threads(nthreads, p->plans[cur_dim], n_after,
		  out, n_after * ostride, ostride,
		  (fftw_complex*)NULL, 0, 0);
}
Beispiel #2
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t, t0;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t0);
     
     FFTW_TIME_FFT(fftw_threads(nthreads, plan, howmany_fields,
				in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0)));
     WHEN_VERBOSE(1, printf("time for one fft (%d threads): %s", nthreads, smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));
     WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
Beispiel #3
0
void fftwnd_threads(int nthreads, fftwnd_plan p, int howmany,
		    fftw_complex *in, int istride, int idist,
		    fftw_complex *out, int ostride, int odist)
{
     switch (p->rank) {
	 case 0:
	      break;
	 case 1:
	      if (p->is_in_place)	/* fft is in-place */
		   fftw_threads(nthreads, p->plans[0], howmany,
				in, istride, idist,
				(fftw_complex*) NULL, 0, 0);
	      else
		   fftw_threads(nthreads, p->plans[0], howmany,
				in, istride, idist,
				out, ostride, odist);
	      break;
	 default: /* rank >= 2 */
	 {
	      int i;
	      
	      if (p->is_in_place) {
		   out = in;
		   ostride = istride;
		   odist = idist;
	      }

	      if (nthreads <= 1)
		   fftwnd(p, howmany, in, istride, idist, out, ostride, odist);
	      else
		   for (i = 0; i < howmany; ++i)
			fftwnd_threads_aux(nthreads, p, 0,
					   in + i*idist, istride,
					   out + i*odist, ostride);
	 }
     }
}
Beispiel #4
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     fftw_complex *in1, *in2, *out2;
     fftw_plan plan;
     int i, j;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany);
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     if (!specific)
	  plan = fftw_create_plan(n, dir, flags);
     else
	  plan = fftw_create_plan_specific(n, dir, flags,
					   in1, istride,
					   (fftw_complex *) NULL, 0);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in1[i * istride]) = c_re(in2[i]) = DRAND();
	  c_im(in1[i * istride]) = c_im(in2[i]) = DRAND();
     }

     /* 
      * fill in other positions of the array, to make sure that
      * fftw doesn't overwrite them 
      */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i) {
	       c_re(in1[i * istride + j]) = i * istride + j;
	       c_im(in1[i * istride + j]) = i * istride - j;
	  }
     CHECK(plan != NULL, "can't create plan");
     WHEN_VERBOSE(2, fftw_print_plan(plan));

     /* fft-ize */
     if (howmany != 1 || istride != 1 || coinflip())
	  fftw_threads(nthreads, plan, howmany, in1, istride, n * istride,
		       (fftw_complex *) NULL, 0, 0);
     else
	  fftw_threads_one(nthreads, plan, in1, NULL);

     fftw_destroy_plan(plan);

     /* check for overwriting */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       CHECK(c_re(in1[i * istride + j]) == i * istride + j &&
		     c_im(in1[i * istride + j]) == i * istride - j,
		     "input has been overwritten");

     for (i = 0; i < howmany; ++i) {
	  fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n);
     }

     CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE,
	   "test_in_place: wrong answer");
     WHEN_VERBOSE(2, printf("OK\n"));

     fftw_free(in1);
     fftw_free(in2);
     fftw_free(out2);
}