static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; P *pln; const problem_dft *p; iodim *d; const kdft_desc *e = ego->desc; static const plan_adt padt = { X(dft_solve), X(null_awake), print, destroy }; UNUSED(plnr); if (!applicable(ego_, p_, plnr)) return (plan *)0; p = (const problem_dft *) p_; pln = MKPLAN_DFT(P, &padt, apply); d = p->sz->dims; pln->k = ego->k; pln->is = X(mkstride)(e->sz, d[0].is); pln->os = X(mkstride)(e->sz, d[0].os); X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->slv = ego; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_dft *p; P *pln; static const plan_adt padt = { X(dft_solve), X(null_awake), print, X(plan_null_destroy) }; UNUSED(plnr); if (!applicable(ego_, p_)) return (plan *) 0; p = (const problem_dft *) p_; pln = MKPLAN_DFT(P, &padt, ego->adt->apply); X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->slv = ego; /* 2*vl loads, 2*vl stores */ X(ops_other)(4 * pln->vl, &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) { const problem_dft *p; P *pln; const iodim *d; static const plan_adt padt = { X(dft_solve), X(null_awake), print, X(plan_null_destroy) }; UNUSED(ego); if (!applicable(p_, plnr)) return (plan *) 0; p = (const problem_dft *) p_; d = p->vecsz->dims; pln = MKPLAN_DFT(P, &padt, X(transpose_simplep)(d, d+1, 1, X(imin)(d[0].is,d[0].os), p->ri, p->ii) ? apply : (X(transpose_slowp)(d, d+1, 2) ? apply_slow : apply_general)); X(transpose_dims)(d, d+1, &pln->n, &pln->m, &pln->d, &pln->nd, &pln->md); pln->offset = (p->ri - p->ii == 1) ? -1 : 0; pln->s0 = d[0].is; pln->s1 = d[0].os; /* (4 loads + 4 stores) * (pln->n \choose 2) (FIXME? underestimate for non-square) */ X(ops_other)(4 * pln->n * (pln->m - 1), &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) { const problem_dft *p; P *pln; INT n; static const plan_adt padt = { X(dft_solve), awake, print, X(plan_null_destroy) }; if (!applicable(ego, p_, plnr)) return (plan *)0; pln = MKPLAN_DFT(P, &padt, apply); p = (const problem_dft *) p_; pln->n = n = p->sz->dims[0].n; pln->is = p->sz->dims[0].is; pln->os = p->sz->dims[0].os; pln->td = 0; pln->super.super.ops.add = (n-1) * 5; pln->super.super.ops.mul = 0; pln->super.super.ops.fma = (n-1) * (n-1) ; #if 0 /* these are nice pipelined sequential loads and should cost nothing */ pln->super.super.ops.other = (n-1)*(4 + 1 + 2 * (n-1)); /* approximate */ #endif return &(pln->super.super); }
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) { const problem_dft *p = (const problem_dft *) p_; P *pln; INT n; INT is, os; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego, p_, plnr)) return (plan *) 0; n = p->sz->dims[0].n; is = p->sz->dims[0].is; os = p->sz->dims[0].os; pln = MKPLAN_DFT(P, &padt, apply); if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) { X(ifree)(pln); return (plan *) 0; } return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_dft *p; plan *cld; INT ishift = 0, oshift = 0; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; UNUSED(ego_); if (!applicable(p_, plnr)) return (plan *)0; p = (const problem_dft *) p_; { tensor *ri_vec = X(mktensor_1d)(2, p->ii - p->ri, p->io - p->ro); tensor *cld_vec = X(tensor_append)(ri_vec, p->vecsz); int i; for (i = 0; i < cld_vec->rnk; ++i) { /* make all istrides > 0 */ if (cld_vec->dims[i].is < 0) { INT nm1 = cld_vec->dims[i].n - 1; ishift -= nm1 * (cld_vec->dims[i].is *= -1); oshift -= nm1 * (cld_vec->dims[i].os *= -1); } } cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1)(p->sz, cld_vec, p->ri + ishift, p->ro + oshift, R2HC)); X(tensor_destroy2)(ri_vec, cld_vec); } if (!cld) return (plan *)0; pln = MKPLAN_DFT(P, &padt, apply); if (p->sz->rnk == 0) { pln->n = 1; pln->os = 0; } else { pln->n = p->sz->dims[0].n; pln->os = p->sz->dims[0].os; } pln->ishift = ishift; pln->oshift = oshift; pln->cld = cld; pln->super.super.ops = cld->ops; pln->super.super.ops.other += 8 * ((pln->n - 1)/2); pln->super.super.ops.add += 4 * ((pln->n - 1)/2); pln->super.super.ops.other += 1; /* estimator hack for nop plans */ return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; P *pln; const problem_dft *p; iodim *d; const kdft_desc *e = ego->desc; static const plan_adt padt = { X(dft_solve), X(null_awake), print, destroy }; UNUSED(plnr); if (ego->bufferedp) { if (!applicable_buf(ego_, p_, plnr)) return (plan *)0; pln = MKPLAN_DFT(P, &padt, apply_buf); } else { int extra_iterp = 0; if (!applicable(ego_, p_, plnr, &extra_iterp)) return (plan *)0; pln = MKPLAN_DFT(P, &padt, extra_iterp ? apply_extra_iter : apply); } p = (const problem_dft *) p_; d = p->sz->dims; pln->k = ego->k; pln->n = d[0].n; pln->is = X(mkstride)(pln->n, d[0].is); pln->os = X(mkstride)(pln->n, d[0].os); pln->bufstride = X(mkstride)(pln->n, 2 * compute_batchsize(pln->n)); X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->slv = ego; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops); if (ego->bufferedp) pln->super.super.ops.other += 4 * pln->n * pln->vl; pln->super.super.could_prune_now_p = !ego->bufferedp; return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_dft *p; P *pln; plan *cld1 = 0, *cld2 = 0; tensor *sz1, *sz2, *vecszi, *sz2i; int spltrnk; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &spltrnk)) return (plan *) 0; p = (const problem_dft *) p_; X(tensor_split)(p->sz, &sz1, spltrnk, &sz2); vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS); sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS); cld1 = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(sz2), X(tensor_append)(p->vecsz, sz1), p->ri, p->ii, p->ro, p->io)); if (!cld1) goto nada; cld2 = X(mkplan_d)(plnr, X(mkproblem_dft_d)( X(tensor_copy_inplace)(sz1, INPLACE_OS), X(tensor_append)(vecszi, sz2i), p->ro, p->io, p->ro, p->io)); if (!cld2) goto nada; pln = MKPLAN_DFT(P, &padt, apply); pln->cld1 = cld1; pln->cld2 = cld2; pln->solver = ego; X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); X(tensor_destroy4)(sz1, sz2, vecszi, sz2i); return &(pln->super.super); nada: X(plan_destroy_internal)(cld2); X(plan_destroy_internal)(cld1); X(tensor_destroy4)(sz1, sz2, vecszi, sz2i); return (plan *) 0; }
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) { const problem_dft *p = (const problem_dft *) p_; P *pln; INT n, nb; plan *cldf = 0; R *buf = (R *) 0; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego, p_, plnr)) return (plan *) 0; n = p->sz->dims[0].n; nb = choose_transform_size(2 * n - 1); buf = (R *) MALLOC(2 * nb * sizeof(R), BUFFERS); cldf = X(mkplan_f_d)(plnr, X(mkproblem_dft_d)(X(mktensor_1d)(nb, 2, 2), X(mktensor_1d)(1, 0, 0), buf, buf+1, buf, buf+1), NO_SLOW, 0, 0); if (!cldf) goto nada; X(ifree)(buf); pln = MKPLAN_DFT(P, &padt, apply); pln->n = n; pln->nb = nb; pln->w = 0; pln->W = 0; pln->cldf = cldf; pln->is = p->sz->dims[0].is; pln->os = p->sz->dims[0].os; X(ops_add)(&cldf->ops, &cldf->ops, &pln->super.super.ops); pln->super.super.ops.add += 4 * n + 2 * nb; pln->super.super.ops.mul += 8 * n + 4 * nb; pln->super.super.ops.other += 6 * (n + nb); return &(pln->super.super); nada: X(ifree0)(buf); X(plan_destroy_internal)(cldf); return (plan *)0; }
static plan *mkplan_dit(const solver *ego, const problem *p_, planner *plnr) { const problem_dft *p = (const problem_dft *) p_; P_dit *pln = 0; int n, r, m; int is, os; plan *cld = (plan *) 0; static const plan_adt padt = { X(dft_solve), awake_dit, print_dit, destroy_dit }; if (!applicable_dit(ego, p_, plnr)) goto nada; n = p->sz->dims[0].n; is = p->sz->dims[0].is; os = p->sz->dims[0].os; r = X(first_divisor)(n); m = n / r; cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_1d)(m, r * is, os), X(mktensor_1d)(r, is, m * os), p->ri, p->ii, p->ro, p->io)); if (!cld) goto nada; pln = MKPLAN_DFT(P_dit, &padt, apply_dit); if (!mkP(&pln->super, r, os*m, os*m, p->ro, p->io, plnr)) goto nada; pln->os = os; pln->m = m; pln->cld = cld; pln->W = 0; pln->super.super.super.ops.add += 2 * (r-1); pln->super.super.super.ops.mul += 4 * (r-1); X(ops_madd)(m, &pln->super.super.super.ops, &cld->ops, &pln->super.super.super.ops); return &(pln->super.super.super); nada: X(plan_destroy_internal)(cld); X(ifree0)(pln); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_dft *p; plan *cld; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; UNUSED(ego_); if (!applicable(p_, plnr)) return (plan *)0; p = (const problem_dft *) p_; { tensor *ri_vec = X(mktensor_1d)(2, p->ii - p->ri, p->io - p->ro); tensor *cld_vec = X(tensor_append)(ri_vec, p->vecsz); cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1)(p->sz, cld_vec, p->ri, p->ro, R2HC)); X(tensor_destroy2)(ri_vec, cld_vec); } if (!cld) return (plan *)0; pln = MKPLAN_DFT(P, &padt, apply); #if ALLOW_RANK0 if (p->sz->rnk == 0) { pln->n = 1; pln->os = 0; } else #endif { pln->n = p->sz->dims[0].n; pln->os = p->sz->dims[0].os; } pln->cld = cld; pln->super.super.ops = cld->ops; pln->super.super.ops.other += 8 * ((pln->n - 1)/2); pln->super.super.ops.add += 4 * ((pln->n - 1)/2); return &(pln->super.super); }
static plan *mkplan(const solver *ego, const problem *p, planner *plnr) { static const plan_adt padt = { X(dft_solve), X(null_awake), print, X(plan_null_destroy) }; plan_dft *pln; UNUSED(plnr); if (!applicable(ego, p)) return (plan *) 0; pln = MKPLAN_DFT(plan_dft, &padt, apply); X(ops_zero)(&pln->super.ops); return &(pln->super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_dft *p; P *pln; plan *cld; int vdim; iodim *d; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &vdim)) return (plan *) 0; p = (const problem_dft *) p_; d = p->vecsz->dims + vdim; A(d->n > 1); cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)( X(tensor_copy)(p->sz), X(tensor_copy_except)(p->vecsz, vdim), TAINT(p->ri, d->is), TAINT(p->ii, d->is), TAINT(p->ro, d->os), TAINT(p->io, d->os))); if (!cld) return (plan *) 0; pln = MKPLAN_DFT(P, &padt, apply); pln->cld = cld; pln->vl = d->n; pln->ivs = d->is; pln->ovs = d->os; pln->solver = ego; X(ops_zero)(&pln->super.super.ops); pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */ X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); if (p->sz->rnk != 1 || (p->sz->dims[0].n > 64)) pln->super.super.pcost = pln->vl * cld->pcost; return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const problem_dft *p = (const problem_dft *) p_; const S *ego = (const S *) ego_; P *pln; plan *cld = 0, *cldcpy = 0; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *) 0; cldcpy = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_0d)(), X(tensor_append)(p->vecsz, p->sz), p->ri, p->ii, p->ro, p->io)); if (!cldcpy) goto nada; cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); if (!cld) goto nada; pln = MKPLAN_DFT(P, &padt, ego->adt->apply); pln->cld = cld; pln->cldcpy = cldcpy; pln->slv = ego; X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); return &(pln->super.super); nada: X(plan_destroy_internal)(cld); X(plan_destroy_internal)(cldcpy); return (plan *)0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const S *ego = (const S *)ego_; plan *cld = (plan *) 0; plan *cldcpy = (plan *) 0; plan *cldrest = (plan *) 0; const problem_dft *p = (const problem_dft *) p_; R *bufs = (R *) 0; INT nbuf = 0, bufdist, n, vl; INT ivs, ovs, roffset, ioffset; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego, p_, plnr)) goto nada; n = X(tensor_sz)(p->sz); X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]); bufdist = X(bufdist)(n, vl); A(nbuf > 0); /* attempt to keep real and imaginary part in the same order, so as to allow optimizations in the the copy plan */ roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0; ioffset = 1 - roffset; /* initial allocation for the purpose of planning */ bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS); /* allow destruction of input if problem is in place */ cld = X(mkplan_f_d)(plnr, X(mkproblem_dft_d)( X(mktensor_1d)(n, p->sz->dims[0].is, 2), X(mktensor_1d)(nbuf, ivs, bufdist * 2), TAINT(p->ri, ivs * nbuf), TAINT(p->ii, ivs * nbuf), bufs + roffset, bufs + ioffset), 0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0); if (!cld) goto nada; /* copying back from the buffer is a rank-0 transform: */ cldcpy = X(mkplan_d)(plnr, X(mkproblem_dft_d)( X(mktensor_0d)(), X(mktensor_2d)(nbuf, bufdist * 2, ovs, n, 2, p->sz->dims[0].os), bufs + roffset, bufs + ioffset, TAINT(p->ro, ovs * nbuf), TAINT(p->io, ovs * nbuf))); if (!cldcpy) goto nada; /* deallocate buffers, let apply() allocate them for real */ X(ifree)(bufs); bufs = 0; /* plan the leftover transforms (cldrest): */ { INT id = ivs * (nbuf * (vl / nbuf)); INT od = ovs * (nbuf * (vl / nbuf)); cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)( X(tensor_copy)(p->sz), X(mktensor_1d)(vl % nbuf, ivs, ovs), p->ri+id, p->ii+id, p->ro+od, p->io+od)); } if (!cldrest) goto nada; pln = MKPLAN_DFT(P, &padt, apply); pln->cld = cld; pln->cldcpy = cldcpy; pln->cldrest = cldrest; pln->n = n; pln->vl = vl; pln->ivs_by_nbuf = ivs * nbuf; pln->ovs_by_nbuf = ovs * nbuf; pln->roffset = roffset; pln->ioffset = ioffset; pln->nbuf = nbuf; pln->bufdist = bufdist; { opcnt t; X(ops_add)(&cld->ops, &cldcpy->ops, &t); X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); } return &(pln->super.super); nada: X(ifree0)(bufs); X(plan_destroy_internal)(cldrest); X(plan_destroy_internal)(cldcpy); X(plan_destroy_internal)(cld); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_dft *p; P *pln; problem *cldp; int vdim; iodim *d; plan **cldrn = (plan **) 0; int i, nthr; INT its, ots, block_size; tensor *vecsz = 0; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &vdim)) return (plan *) 0; p = (const problem_dft *) p_; d = p->vecsz->dims + vdim; block_size = (d->n + plnr->nthr - 1) / plnr->nthr; nthr = (int)((d->n + block_size - 1) / block_size); plnr->nthr = (plnr->nthr + nthr - 1) / nthr; its = d->is * block_size; ots = d->os * block_size; cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS); for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0; vecsz = X(tensor_copy)(p->vecsz); for (i = 0; i < nthr; ++i) { vecsz->dims[vdim].n = (i == nthr - 1) ? (d->n - i*block_size) : block_size; cldp = X(mkproblem_dft)(p->sz, vecsz, p->ri + i*its, p->ii + i*its, p->ro + i*ots, p->io + i*ots); cldrn[i] = X(mkplan_d)(plnr, cldp); if (!cldrn[i]) goto nada; } X(tensor_destroy)(vecsz); pln = MKPLAN_DFT(P, &padt, apply); pln->cldrn = cldrn; pln->its = its; pln->ots = ots; pln->nthr = nthr; pln->solver = ego; X(ops_zero)(&pln->super.super.ops); pln->super.super.pcost = 0; for (i = 0; i < nthr; ++i) { X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops); pln->super.super.pcost += cldrn[i]->pcost; } return &(pln->super.super); nada: if (cldrn) { for (i = 0; i < nthr; ++i) X(plan_destroy_internal)(cldrn[i]); X(ifree)(cldrn); } X(tensor_destroy)(vecsz); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const problem_dft *p = (const problem_dft *) p_; P *pln; plan *cld = 0, *cldtrans = 0, *cldrest = 0; int pdim0, pdim1; tensor *ts, *tv; INT vl, ivs, ovs; R *rit, *iit, *rot, *iot; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &pdim0, &pdim1)) return (plan *) 0; vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n; A(vl >= 1); ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is; ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os; rit = TAINT(p->ri, vl == 1 ? 0 : ivs); iit = TAINT(p->ii, vl == 1 ? 0 : ivs); rot = TAINT(p->ro, vl == 1 ? 0 : ovs); iot = TAINT(p->io, vl == 1 ? 0 : ovs); ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS); ts->dims[pdim1].os = p->vecsz->dims[pdim0].is; tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS); tv->dims[pdim0].os = p->sz->dims[pdim1].is; tv->dims[pdim0].n = p->sz->dims[pdim1].n; cldtrans = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_0d)(), X(tensor_append)(tv, ts), rit, iit, rot, iot)); X(tensor_destroy2)(ts, tv); if (!cldtrans) goto nada; ts = X(tensor_copy)(p->sz); ts->dims[pdim1].is = p->vecsz->dims[pdim0].is; tv = X(tensor_copy)(p->vecsz); tv->dims[pdim0].is = p->sz->dims[pdim1].is; tv->dims[pdim0].n = p->sz->dims[pdim1].n; cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv, rot, iot, rot, iot)); if (!cld) goto nada; tv = X(tensor_copy)(p->vecsz); tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n; cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv, p->ri + ivs * vl, p->ii + ivs * vl, p->ro + ovs * vl, p->io + ovs * vl)); if (!cldrest) goto nada; pln = MKPLAN_DFT(P, &padt, apply_op); pln->cldtrans = cldtrans; pln->cld = cld; pln->cldrest = cldrest; pln->vl = vl; pln->ivs = ivs; pln->ovs = ovs; X(ops_cpy)(&cldrest->ops, &pln->super.super.ops); X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops); X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops); return &(pln->super.super); nada: X(plan_destroy_internal)(cldrest); X(plan_destroy_internal)(cld); X(plan_destroy_internal)(cldtrans); return (plan *)0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const ct_solver *ego = (const ct_solver *) ego_; const problem_dft *p; P *pln = 0; plan *cld = 0, *cldw = 0; INT n, r, m, v, ivs, ovs; iodim *d; static const plan_adt padt = { X(dft_solve), awake, print, destroy }; if ((NO_NONTHREADEDP(plnr)) || !X(ct_applicable)(ego, p_, plnr)) return (plan *) 0; p = (const problem_dft *) p_; d = p->sz->dims; n = d[0].n; r = X(choose_radix)(ego->r, n); m = n / r; X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); switch (ego->dec) { case DECDIT: { cldw = ego->mkcldw(ego, r, m * d[0].os, m * d[0].os, m, d[0].os, v, ovs, ovs, 0, m, p->ro, p->io, plnr); if (!cldw) goto nada; cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)( X(mktensor_1d)(m, r * d[0].is, d[0].os), X(mktensor_2d)(r, d[0].is, m * d[0].os, v, ivs, ovs), p->ri, p->ii, p->ro, p->io) ); if (!cld) goto nada; pln = MKPLAN_DFT(P, &padt, apply_dit); break; } case DECDIF: case DECDIF+TRANSPOSE: { INT cors, covs; /* cldw ors, ovs */ if (ego->dec == DECDIF+TRANSPOSE) { cors = ivs; covs = m * d[0].is; /* ensure that we generate well-formed dftw subproblems */ /* FIXME: too conservative */ if (!(1 && r == v && d[0].is == r * cors)) goto nada; /* FIXME: allow in-place only for now, like in fftw-3.[01] */ if (!(1 && p->ri == p->ro && d[0].is == r * d[0].os && cors == d[0].os && covs == ovs )) goto nada; } else { cors = m * d[0].is; covs = ivs; } cldw = ego->mkcldw(ego, r, m * d[0].is, cors, m, d[0].is, v, ivs, covs, 0, m, p->ri, p->ii, plnr); if (!cldw) goto nada; cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)( X(mktensor_1d)(m, d[0].is, r * d[0].os), X(mktensor_2d)(r, cors, d[0].os, v, covs, ovs), p->ri, p->ii, p->ro, p->io) ); if (!cld) goto nada; pln = MKPLAN_DFT(P, &padt, apply_dif); break; } default: A(0); } pln->cld = cld; pln->cldw = cldw; pln->r = r; X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops); /* inherit could_prune_now_p attribute from cldw */ pln->super.super.could_prune_now_p = cldw->could_prune_now_p; return &(pln->super.super); nada: X(plan_destroy_internal)(cldw); X(plan_destroy_internal)(cld); return (plan *) 0; }