예제 #1
0
static void fftwnd_aux_many_threads(int nthreads, int n, int n_after,
				    fftwnd_plan plan, int cur_dim,
				    fftw_complex *in, int istride,
				    fftw_complex *out, int ostride)
{
     fftw_complex *tmp;
     fftwnd_aux_many_data d;

     if (nthreads > n)
	  nthreads = n;

     tmp = (fftw_complex *) fftw_malloc(nthreads * plan->nwork
					* sizeof(fftw_complex));
     
     d.plan = plan;
     d.cur_dim = cur_dim;
     d.distance = n_after;
     d.in = in;
     d.out = out;
     d.istride = istride;
     d.ostride = ostride;
     d.work = tmp;
     
     fftw_thread_spawn_loop(n, nthreads, fftwnd_aux_many_thread, &d);

     fftw_free(tmp);
}
예제 #2
0
void rfftw_c2real_overlap_threads_aux(fftw_plan plan, int howmany,
			      fftw_complex *in, int istride, int idist,
			      fftw_real *out, int ostride, int odist,
			      fftw_real *work,
			      int nthreads)
{
     rexec2_thread_data d;
     
     d.plan = plan;
     d.in = in;
     d.istride = istride;
     d.idist = idist;
     d.out = out;
     d.ostride = ostride;
     d.odist = odist;
     d.work = work;

     fftw_thread_spawn_loop(howmany, nthreads, c2real_overlap_aux_thread1, &d);
     fftw_thread_spawn_loop(howmany, nthreads, c2real_overlap_aux_thread2, &d);
}
예제 #3
0
void rfftwnd_real2c_threads_aux(fftwnd_plan p, int cur_dim,
				fftw_real *in, int istride,
				fftw_complex *out, int ostride,
				fftw_complex *work,
				int nthreads)
{
     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];

     if (cur_dim == p->rank - 2) {
	  /* just do the last dimension directly: */
	  if (p->is_in_place)
	       rfftw_real2c_threads_aux(p->plans[p->rank - 1], n,
					in, istride, (n_after * istride) * 2,
					out, istride, n_after * istride,
					(fftw_real *) work, nthreads);
	  else
	       rfftw_real2c_threads_aux(p->plans[p->rank - 1], n,
					in, istride,
					p->plans[p->rank - 1]->n * istride,
					out, ostride, n_after * ostride,
					(fftw_real *) work, nthreads);
     }
     else {    /* we have at least two dimensions to go */
	  int nr = p->plans[p->rank - 1]->n;
	  aux_data d;

	  d.p = p;
	  d.cur_dim = cur_dim + 1;
	  d.in = in;
	  d.istride = istride;
	  d.idist = istride * (p->is_in_place ? n_after * 2
			       : nr * (n_after / (nr/2 + 1)));
	  d.out = out;
	  d.ostride = ostride;
	  d.odist = ostride * n_after;
	  d.work = work;

	  fftw_thread_spawn_loop(n, nthreads, real2c_aux_thread, &d);
     }

     /* do the current dimension (in-place): */
     /* (Use internal function instead of fftw_threads so that we can
	pass our workspace array.) */
     fftw_executor_many_inplace_threads(p->plans[cur_dim]->n,
					out, work, p->plans[cur_dim]->root,
					n_after * ostride, n_after, ostride,
					nthreads);
}
예제 #4
0
void rfftwnd_c2real_aux_howmany_threads(fftwnd_plan p, int cur_dim,
					int howmany,
					fftw_complex *in,
					int istride, int idist,
					fftw_real *out,
					int ostride, int odist,
					fftw_complex *work, int nwork,
					int nthreads)
{
     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];

     /* do the current dimension (in-place): */
     {
          fftw_howmany_data d;

          d.p = p->plans[cur_dim];
          d.howmany = howmany;
          d.io_data = in;
          d.iostride = n_after * istride;
          d.iodist = idist;
          d.iodist0 = istride;
          d.work = work;
	  d.wdist = nwork;

          fftw_thread_spawn_loop(n_after, nthreads, fftw_howmany_thread, &d);
     }

     if (cur_dim == p->rank - 2) {
          howmany_aux_data d;

          d.p = p->plans[p->rank - 1];
          d.howmany = howmany;
          d.in = in;
          d.istride = istride; d.idist = idist;
          d.out = out;
          d.ostride = ostride; d.odist = odist;
          d.work = (fftw_real *) work;
	  d.wdist = nwork * 2;

	  /* just do the last dimension directly: */
          if (p->is_in_place) {
               d.idist0 = n_after * istride;
               d.odist0 = n_after * ostride * 2;
               fftw_thread_spawn_loop(n, nthreads,
                                      c2r_overlap_howmany_thread, &d);
          }
          else {
               d.odist0 = p->plans[p->rank - 1]->n * ostride;
               d.idist0 = n_after * istride;
               fftw_thread_spawn_loop(n, nthreads,
                                      c2r_howmany_thread, &d);
          }
     } 
     else {			/* we have at least two dimensions to go */
          /*
           * process the subsequent dimensions recursively, in hyperslabs,
           * to get maximum locality:
           */

          int nr = p->plans[p->rank - 1]->n;
          int n_after_r = p->is_in_place ? n_after * 2 :
               nr * (n_after / (nr/2 + 1));
          howmany_hyperslab_aux_data d;

          d.p = p;
          d.cur_dim = cur_dim + 1;
          d.howmany = howmany;
          d.in = in;
          d.istride = istride;
          d.idist = idist;
          d.idist0 = n_after * istride;
          d.out = out;
          d.ostride = ostride;
          d.odist = odist;
          d.odist0 = n_after_r * ostride;
          d.work = work;
	  d.wdist = nwork;

          fftw_thread_spawn_loop(n, nthreads,
                                 c2r_hyperslab_howmany_thread, &d);
     }
}