/* * Create an fftwnd_plan specialized for specific arrays. (These * arrays are ignored, however, if they are NULL or if the flags do * not include FFTW_MEASURE.) The main advantage of being provided * arrays like this is that we can do runtime timing measurements of * our options, without worrying about allocating excessive scratch * space. */ fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n, fftw_direction dir, int flags, fftw_complex *in, int istride, fftw_complex *out, int ostride) { fftwnd_plan p; if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags))) return 0; if (!(flags & FFTW_MEASURE) || in == 0 || (!p->is_in_place && out == 0)) { /**** use default plan ****/ p->plans = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank), rank, n, dir, flags); if (!p->plans) { fftwnd_destroy_plan(p); return 0; } if (flags & FFTWND_FORCE_BUFFERED) p->nbuffers = FFTWND_NBUFFERS; else p->nbuffers = FFTWND_DEFAULT_NBUFFERS; p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex*) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { fftwnd_destroy_plan(p); return 0; } } } else { /**** use runtime measurements to pick plan ****/ fftw_plan *plans_buf, *plans_nobuf; double t_buf, t_nobuf; p->nwork = fftwnd_work_size(rank, n, flags, FFTWND_NBUFFERS + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex*) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { fftwnd_destroy_plan(p); return 0; } } else p->work = (fftw_complex*) NULL; /* two possible sets of 1D plans: */ plans_buf = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank), rank, n, dir, flags); plans_nobuf = fftwnd_create_plans_specific(fftwnd_new_plan_array(rank), rank, n, p->n_after, dir, flags, in, istride, out, ostride); if (!plans_buf || !plans_nobuf) { destroy_plan_array(rank, plans_nobuf); destroy_plan_array(rank, plans_buf); fftwnd_destroy_plan(p); return 0; } /* time the two possible plans */ p->plans = plans_nobuf; p->nbuffers = 0; p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); t_nobuf = fftwnd_measure_runtime(p, in, istride, out, ostride); p->plans = plans_buf; p->nbuffers = FFTWND_NBUFFERS; p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); t_buf = fftwnd_measure_runtime(p, in, istride, out, ostride); /* pick the better one: */ if (t_nobuf < t_buf) { /* use unbuffered transform */ p->plans = plans_nobuf; p->nbuffers = 0; /* work array is unnecessarily large */ if (p->work) fftw_free(p->work); p->work = 0; destroy_plan_array(rank, plans_buf); /* allocate a work array of the correct size: */ p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex*) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { fftwnd_destroy_plan(p); return 0; } } } else { /* use buffered transform */ destroy_plan_array(rank, plans_nobuf); } } return p; }
/* * Create an fftwnd_plan specialized for specific arrays. (These * arrays are ignored, however, if they are NULL or if the flags * do not include FFTW_MEASURE.) The main advantage of being * provided arrays like this is that we can do runtime timing * measurements of our options, without worrying about allocating * excessive scratch space. */ fftwnd_plan rfftwnd_create_plan_specific(int rank, const int *n, fftw_direction dir, int flags, fftw_real *in, int istride, fftw_real *out, int ostride) { fftwnd_plan p; int i; int rflags = flags & ~FFTW_IN_PLACE; /* note that we always do rfftw transforms out-of-place in rexec2.c */ if (flags & FFTW_IN_PLACE) { out = NULL; ostride = istride; } istride = ostride = 1; /* * strides don't work yet, since it is not * clear whether they apply to real * or complex data */ if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags))) return 0; for (i = 0; i < rank - 1; ++i) p->n_after[i] = (n[rank - 1]/2 + 1) * (p->n_after[i] / n[rank - 1]); if (rank > 0) p->n[rank - 1] = n[rank - 1] / 2 + 1; p->plans = fftwnd_new_plan_array(rank); if (rank > 0 && !p->plans) { rfftwnd_destroy_plan(p); return 0; } if (rank > 0) { p->plans[rank - 1] = rfftw_create_plan(n[rank - 1], dir, rflags); if (!p->plans[rank - 1]) { rfftwnd_destroy_plan(p); return 0; } } if (rank > 1) { if (!(flags & FFTW_MEASURE) || in == 0 || (!p->is_in_place && out == 0)) { if (!fftwnd_create_plans_generic(p->plans, rank - 1, n, dir, flags | FFTW_IN_PLACE)) { rfftwnd_destroy_plan(p); return 0; } } else if (dir == FFTW_COMPLEX_TO_REAL || (flags & FFTW_IN_PLACE)) { if (!fftwnd_create_plans_specific(p->plans, rank - 1, n, p->n_after, dir, flags | FFTW_IN_PLACE, (fftw_complex *) in, istride, 0, 0)) { rfftwnd_destroy_plan(p); return 0; } } else { if (!fftwnd_create_plans_specific(p->plans, rank - 1, n, p->n_after, dir, flags | FFTW_IN_PLACE, (fftw_complex *) out, ostride, 0, 0)) { rfftwnd_destroy_plan(p); return 0; } } } p->nbuffers = 0; p->nwork = fftwnd_work_size(rank, p->n, flags | FFTW_IN_PLACE, p->nbuffers + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { rfftwnd_destroy_plan(p); return 0; } } return p; }