static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const problem_rdft *p; const S *ego = (const S *) ego_; P *pln; int retval; static const plan_adt padt = { X(rdft_solve), X(null_awake), print, X(plan_null_destroy) }; UNUSED(plnr); if (!applicable(ego, p_)) return (plan *) 0; p = (const problem_rdft *) p_; pln = MKPLAN_RDFT(P, &padt, ego->apply); retval = fill_iodim(pln, p); (void)retval; /* UNUSED unless DEBUG */ A(retval); A(pln->vl > 0); /* because FINITE_RNK(p->vecsz->rnk) holds */ pln->nam = ego->nam; /* X(tensor_sz)(p->vecsz) loads, X(tensor_sz)(p->vecsz) stores */ X(ops_other)(2 * X(tensor_sz)(p->vecsz), &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_rdft *p; plan *cld; static const plan_adt padt = { fftwf_rdft_solve, awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *)0; p = (const problem_rdft *) p_; /* NO_DHT_R2HC stops infinite loops with rdft-dht.c */ cld = fftwf_mkplan_f_d(plnr, fftwf_mkproblem_rdft_1(p->sz, p->vecsz, p->I, p->O, R2HC), NO_DHT_R2HC, 0, 0); if (!cld) return (plan *)0; pln = MKPLAN_RDFT(P, &padt, apply); pln->n = p->sz->dims[0].n; pln->os = p->sz->dims[0].os; pln->cld = cld; pln->super.super.ops = cld->ops; pln->super.super.ops.other += 4 * ((pln->n - 1)/2); pln->super.super.ops.add += 2 * ((pln->n - 1)/2); return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_rdft *p; P *pln; plan *cld1 = 0, *cld2 = 0; tensor *sz1, *sz2, *vecszi, *sz2i; int spltrnk; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &spltrnk)) return (plan *) 0; p = (const problem_rdft *) p_; X(tensor_split)(p->sz, &sz1, spltrnk, &sz2); vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS); sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS); cld1 = X(mkplan_d)(plnr, X(mkproblem_rdft_d)(X(tensor_copy)(sz2), X(tensor_append)(p->vecsz, sz1), p->I, p->O, p->kind + spltrnk)); if (!cld1) goto nada; cld2 = X(mkplan_d)(plnr, X(mkproblem_rdft_d)( X(tensor_copy_inplace)(sz1, INPLACE_OS), X(tensor_append)(vecszi, sz2i), p->O, p->O, p->kind)); if (!cld2) goto nada; pln = MKPLAN_RDFT(P, &padt, apply); pln->cld1 = cld1; pln->cld2 = cld2; pln->solver = ego; X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); X(tensor_destroy4)(sz2, sz1, vecszi, sz2i); return &(pln->super.super); nada: X(plan_destroy_internal)(cld2); X(plan_destroy_internal)(cld1); X(tensor_destroy4)(sz2, sz1, vecszi, sz2i); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_rdft *p; plan *cld; R *buf; INT n; opcnt ops; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *)0; p = (const problem_rdft *) p_; n = p->sz->dims[0].n - 1; A(n > 0); buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), X(mktensor_0d)(), buf, buf, R2HC)); X(ifree)(buf); if (!cld) return (plan *)0; pln = MKPLAN_RDFT(P, &padt, apply); pln->n = n; pln->is = p->sz->dims[0].is; pln->os = p->sz->dims[0].os; pln->cld = cld; pln->td = 0; X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); X(ops_zero)(&ops); ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5; ops.add = 2 + (n-1)/2 * 5; ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego, const problem *p, planner *plnr) { static const plan_adt padt = { fftwf_rdft_solve, fftwf_null_awake, print, fftwf_plan_null_destroy }; plan_rdft *pln; UNUSED(plnr); if (!applicable(ego, p)) return (plan *) 0; pln = MKPLAN_RDFT(plan_rdft, &padt, apply); fftwf_ops_zero(&pln->super.ops); return &(pln->super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; P *pln; const problem_rdft *p; iodim *d; static const plan_adt padt = { fftwf_rdft_solve, fftwf_null_awake, print, destroy }; UNUSED(plnr); if (!applicable(ego_, p_)) return (plan *)0; p = (const problem_rdft *) p_; pln = MKPLAN_RDFT(P, &padt, apply); d = p->sz->dims; pln->k = ego->k; pln->is = fftwf_mkstride(d->n, d->is); pln->os = fftwf_mkstride(d->n, d->os); fftwf_tensor_tornk1(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->slv = ego; fftwf_ops_zero(&pln->super.super.ops); fftwf_ops_madd2(pln->vl / ego->desc->genus->vl, &ego->desc->ops, &pln->super.super.ops); pln->super.super.could_prune_now_p = 1; return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const problem_rdft *p = (const problem_rdft *) p_; const S *ego = (const S *) ego_; P *pln; plan *cld = 0, *cldcpy = 0; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *) 0; cldcpy = X(mkplan_d)(plnr, X(mkproblem_rdft_0_d)( X(tensor_append)(p->vecsz, p->sz), p->I, p->O)); if (!cldcpy) goto nada; cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, ego->adt->apply); pln->cld = cld; pln->cldcpy = cldcpy; pln->slv = ego; X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops); return &(pln->super.super); nada: X(plan_destroy_internal)(cld); X(plan_destroy_internal)(cldcpy); return (plan *)0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_rdft *p; plan *cld; R *buf; INT n; opcnt ops; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *)0; p = (const problem_rdft *) p_; n = p->sz->dims[0].n; buf = (R *) MALLOC(sizeof(R) * n, BUFFERS); cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), X(mktensor_0d)(), buf, buf, R2HC)); X(ifree)(buf); if (!cld) return (plan *)0; switch (p->kind[0]) { case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break; case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break; case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break; case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break; default: A(0); return (plan*)0; } pln->n = n; pln->is = p->sz->dims[0].is; pln->os = p->sz->dims[0].os; pln->cld = cld; pln->td = 0; pln->kind = p->kind[0]; X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); X(ops_zero)(&ops); ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5; if (p->kind[0] == REDFT01 || p->kind[0] == RODFT01) { ops.add = (n-1)/2 * 6; ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2; } else { /* 10 transforms */ ops.add = (n-1)/2 * 2; ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2; } X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_rdft *p; P *pln; problem *cldp; int vdim; iodim *d; plan **cldrn = (plan **) 0; int i, nthr; INT its, ots, block_size; tensor *vecsz; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &vdim)) return (plan *) 0; p = (const problem_rdft *) p_; d = p->vecsz->dims + vdim; block_size = (d->n + plnr->nthr - 1) / plnr->nthr; nthr = (int)((d->n + block_size - 1) / block_size); plnr->nthr = (plnr->nthr + nthr - 1) / nthr; its = d->is * block_size; ots = d->os * block_size; cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS); for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0; vecsz = X(tensor_copy)(p->vecsz); for (i = 0; i < nthr; ++i) { vecsz->dims[vdim].n = (i == nthr - 1) ? (d->n - i*block_size) : block_size; cldp = X(mkproblem_rdft)(p->sz, vecsz, p->I + i*its, p->O + i*ots, p->kind); cldrn[i] = X(mkplan_d)(plnr, cldp); if (!cldrn[i]) goto nada; } X(tensor_destroy)(vecsz); pln = MKPLAN_RDFT(P, &padt, apply); pln->cldrn = cldrn; pln->its = its; pln->ots = ots; pln->nthr = nthr; pln->solver = ego; X(ops_zero)(&pln->super.super.ops); pln->super.super.pcost = 0; for (i = 0; i < nthr; ++i) { X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops); pln->super.super.pcost += cldrn[i]->pcost; } return &(pln->super.super); nada: if (cldrn) { for (i = 0; i < nthr; ++i) X(plan_destroy_internal)(cldrn[i]); X(ifree)(cldrn); } X(tensor_destroy)(vecsz); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const hc2hc_solver *ego = (const hc2hc_solver *) ego_; const problem_rdft *p; P *pln = 0; plan *cld = 0, **cldws = 0; INT n, r, m, v, ivs, ovs, mcount; int i, nthr, plnr_nthr_save; INT block_size; iodim *d; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (plnr->nthr <= 1 || !X(hc2hc_applicable)(ego, p_, plnr)) return (plan *) 0; p = (const problem_rdft *) p_; d = p->sz->dims; n = d[0].n; r = X(choose_radix)(ego->r, n); m = n / r; mcount = (m + 2) / 2; X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs); block_size = (mcount + plnr->nthr - 1) / plnr->nthr; nthr = (int)((mcount + block_size - 1) / block_size); plnr_nthr_save = plnr->nthr; plnr->nthr = (plnr->nthr + nthr - 1) / nthr; cldws = (plan **) MALLOC(sizeof(plan *) * nthr, PLANS); for (i = 0; i < nthr; ++i) cldws[i] = (plan *) 0; switch (p->kind[0]) { case R2HC: for (i = 0; i < nthr; ++i) { cldws[i] = ego->mkcldw(ego, R2HC, r, m, d[0].os, v, ovs, i*block_size, (i == nthr - 1) ? (mcount - i*block_size) : block_size, p->O, plnr); if (!cldws[i]) goto nada; } plnr->nthr = plnr_nthr_save; cld = X(mkplan_d)(plnr, X(mkproblem_rdft_d)( X(mktensor_1d)(m, r * d[0].is, d[0].os), X(mktensor_2d)(r, d[0].is, m * d[0].os, v, ivs, ovs), p->I, p->O, p->kind) ); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, apply_dit); break; case HC2R: for (i = 0; i < nthr; ++i) { cldws[i] = ego->mkcldw(ego, HC2R, r, m, d[0].is, v, ivs, i*block_size, (i == nthr - 1) ? (mcount - i*block_size) : block_size, p->I, plnr); if (!cldws[i]) goto nada; } plnr->nthr = plnr_nthr_save; cld = X(mkplan_d)(plnr, X(mkproblem_rdft_d)( X(mktensor_1d)(m, d[0].is, r * d[0].os), X(mktensor_2d)(r, m * d[0].is, d[0].os, v, ivs, ovs), p->I, p->O, p->kind) ); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, apply_dif); break; default: A(0); } pln->cld = cld; pln->cldws = cldws; pln->nthr = nthr; pln->r = r; X(ops_zero)(&pln->super.super.ops); for (i = 0; i < nthr; ++i) { X(ops_add2)(&cldws[i]->ops, &pln->super.super.ops); pln->super.super.could_prune_now_p |= cldws[i]->could_prune_now_p; } X(ops_add2)(&cld->ops, &pln->super.super.ops); return &(pln->super.super); nada: if (cldws) { for (i = 0; i < nthr; ++i) X(plan_destroy_internal)(cldws[i]); X(ifree)(cldws); } X(plan_destroy_internal)(cld); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_rdft *p; plan *cld = (plan *) 0, *cldcpy; R *buf = (R *) 0; int n; int vl, ivs, ovs; opcnt ops; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) goto nada; p = (const problem_rdft *) p_; n = p->sz->dims[0].n - 1; A(n > 0); buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS); cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), X(mktensor_0d)(), buf, buf, R2HC)); if (!cld) goto nada; X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); cldcpy = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_0d)(), X(mktensor_1d)(n+1,1, p->sz->dims[0].os), buf, TAINT(p->O, ovs), R2HC)); if (!cldcpy) goto nada; X(ifree)(buf); pln = MKPLAN_RDFT(P, &padt, apply); pln->n = n; pln->is = p->sz->dims[0].is; pln->cld = cld; pln->cldcpy = cldcpy; pln->vl = vl; pln->ivs = ivs; pln->ovs = ovs; X(ops_zero)(&ops); ops.other = n + 2*n; /* loads + stores (input -> buf) */ X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops); return &(pln->super.super); nada: X(ifree0)(buf); if (cld) X(plan_destroy_internal)(cld); return (plan *)0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const hc2hc_solver *ego = (const hc2hc_solver *) ego_; const problem_rdft *p; P *pln = 0; plan *cld = 0, *cldw = 0; INT n, r, m, vl, ivs, ovs; iodim *d; tensor *t1, *t2; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (NO_NONTHREADEDP(plnr) || !X(hc2hc_applicable)(ego, p_, plnr)) return (plan *) 0; p = (const problem_rdft *) p_; d = p->sz->dims; n = d[0].n; r = X(choose_radix)(ego->r, n); m = n / r; X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); switch (p->kind[0]) { case R2HC: cldw = ego->mkcldw(ego, R2HC, r, m, d[0].os, vl, ovs, 0, (m+2)/2, p->O, plnr); if (!cldw) goto nada; t1 = X(mktensor_1d)(r, d[0].is, m * d[0].os); t2 = X(tensor_append)(t1, p->vecsz); X(tensor_destroy)(t1); cld = X(mkplan_d)(plnr, X(mkproblem_rdft_d)( X(mktensor_1d)(m, r * d[0].is, d[0].os), t2, p->I, p->O, p->kind) ); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, apply_dit); break; case HC2R: cldw = ego->mkcldw(ego, HC2R, r, m, d[0].is, vl, ivs, 0, (m+2)/2, p->I, plnr); if (!cldw) goto nada; t1 = X(mktensor_1d)(r, m * d[0].is, d[0].os); t2 = X(tensor_append)(t1, p->vecsz); X(tensor_destroy)(t1); cld = X(mkplan_d)(plnr, X(mkproblem_rdft_d)( X(mktensor_1d)(m, d[0].is, r * d[0].os), t2, p->I, p->O, p->kind) ); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, apply_dif); break; default: A(0); } pln->cld = cld; pln->cldw = cldw; pln->r = r; X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops); /* inherit could_prune_now_p attribute from cldw */ pln->super.super.could_prune_now_p = cldw->could_prune_now_p; return &(pln->super.super); nada: X(plan_destroy_internal)(cldw); X(plan_destroy_internal)(cld); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const S *ego = (const S *)ego_; plan *cld = (plan *) 0; plan *cldcpy = (plan *) 0; plan *cldrest = (plan *) 0; const problem_rdft *p = (const problem_rdft *) p_; float *bufs = (float *) 0; INT nbuf = 0, bufdist, n, vl; INT ivs, ovs; int hc2rp; static const plan_adt padt = { fftwf_rdft_solve, awake, print, destroy }; if (!applicable(ego, p_, plnr)) goto nada; n = fftwf_tensor_sz(p->sz); fftwf_tensor_tornk1(p->vecsz, &vl, &ivs, &ovs); hc2rp = (p->kind[0] == HC2R); nbuf = fftwf_nbuf(n, vl, maxnbufs[ego->maxnbuf_ndx]); bufdist = fftwf_bufdist(n, vl); A(nbuf > 0); /* initial allocation for the purpose of planning */ bufs = (float *) MALLOC(sizeof(float) * nbuf * bufdist, BUFFERS); if (hc2rp) { /* allow destruction of buffer */ cld = fftwf_mkplan_f_d(plnr, fftwf_mkproblem_rdft_d( fftwf_mktensor_1d(n, 1, p->sz->dims[0].os), fftwf_mktensor_1d(nbuf, bufdist, ovs), bufs, TAINT(p->O, ovs * nbuf), p->kind), 0, 0, NO_DESTROY_INPUT); if (!cld) goto nada; /* copying input into buffer buffer is a rank-0 transform: */ cldcpy = fftwf_mkplan_d(plnr, fftwf_mkproblem_rdft_0_d( fftwf_mktensor_2d(nbuf, ivs, bufdist, n, p->sz->dims[0].is, 1), TAINT(p->I, ivs * nbuf), bufs)); if (!cldcpy) goto nada; } else { /* allow destruction of input if problem is in place */ cld = fftwf_mkplan_f_d(plnr, fftwf_mkproblem_rdft_d( fftwf_mktensor_1d(n, p->sz->dims[0].is, 1), fftwf_mktensor_1d(nbuf, ivs, bufdist), TAINT(p->I, ivs * nbuf), bufs, p->kind), 0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0); if (!cld) goto nada; /* copying back from the buffer is a rank-0 transform: */ cldcpy = fftwf_mkplan_d(plnr, fftwf_mkproblem_rdft_0_d( fftwf_mktensor_2d(nbuf, bufdist, ovs, n, 1, p->sz->dims[0].os), bufs, TAINT(p->O, ovs * nbuf))); if (!cldcpy) goto nada; } /* deallocate buffers, let apply() allocate them for real */ fftwf_ifree(bufs); bufs = 0; /* plan the leftover transforms (cldrest): */ { INT id = ivs * (nbuf * (vl / nbuf)); INT od = ovs * (nbuf * (vl / nbuf)); cldrest = fftwf_mkplan_d(plnr, fftwf_mkproblem_rdft_d( fftwf_tensor_copy(p->sz), fftwf_mktensor_1d(vl % nbuf, ivs, ovs), p->I + id, p->O + od, p->kind)); } if (!cldrest) goto nada; pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply); pln->cld = cld; pln->cldcpy = cldcpy; pln->cldrest = cldrest; pln->n = n; pln->vl = vl; pln->ivs_by_nbuf = ivs * nbuf; pln->ovs_by_nbuf = ovs * nbuf; pln->nbuf = nbuf; pln->bufdist = bufdist; { opcnt t; fftwf_ops_add(&cld->ops, &cldcpy->ops, &t); fftwf_ops_madd(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); } return &(pln->super.super); nada: fftwf_ifree0(bufs); fftwf_plan_destroy_internal(cldrest); fftwf_plan_destroy_internal(cldcpy); fftwf_plan_destroy_internal(cld); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; P *pln; const problem_rdft *p; iodim *d; INT rs, cs, b, n; static const plan_adt padt = { X(rdft_solve), X(null_awake), print, destroy }; UNUSED(plnr); if (ego->bufferedp) { if (!applicable_buf(ego_, p_)) return (plan *)0; } else { if (!applicable(ego_, p_)) return (plan *)0; } p = (const problem_rdft *) p_; if (R2HC_KINDP(p->kind[0])) { rs = p->sz->dims[0].is; cs = p->sz->dims[0].os; pln = MKPLAN_RDFT(P, &padt, ego->bufferedp ? apply_buf_r2hc : apply_r2hc); } else { rs = p->sz->dims[0].os; cs = p->sz->dims[0].is; pln = MKPLAN_RDFT(P, &padt, ego->bufferedp ? apply_buf_hc2r : apply_hc2r); } d = p->sz->dims; n = d[0].n; pln->k = ego->k; pln->n = n; pln->rs0 = rs; pln->rs = X(mkstride)(n, 2 * rs); pln->csr = X(mkstride)(n, cs); pln->csi = X(mkstride)(n, -cs); pln->ioffset = ioffset(p->kind[0], n, cs); b = compute_batchsize(n); pln->brs = X(mkstride)(n, 2 * b); pln->bcsr = X(mkstride)(n, b); pln->bcsi = X(mkstride)(n, -b); pln->bioffset = ioffset(p->kind[0], n, b); X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->slv = ego; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl / ego->desc->genus->vl, &ego->desc->ops, &pln->super.super.ops); if (ego->bufferedp) pln->super.super.ops.other += 2 * n * pln->vl; pln->super.super.could_prune_now_p = !ego->bufferedp; return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_rdft *p; plan *clde, *cldo; R *buf; INT n, n0; opcnt ops; int inplace_odd; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *)0; p = (const problem_rdft *) p_; n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1); A(n > 0 && n % 2 == 0); buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS); inplace_odd = p->kind[0]==RODFT00 && p->I == p->O; clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)( X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is, inplace_odd ? p->sz->dims[0].is : p->sz->dims[0].os), X(mktensor_0d)(), TAINT(p->I + p->sz->dims[0].is * (p->kind[0]==RODFT00), p->vecsz->rnk ? p->vecsz->dims[0].is : 0), TAINT(p->O + p->sz->dims[0].is * inplace_odd, p->vecsz->rnk ? p->vecsz->dims[0].os : 0), p->kind[0])); if (!clde) { X(ifree)(buf); return (plan *)0; } cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)( X(mktensor_1d)(n/2, 1, 1), X(mktensor_0d)(), buf, buf, R2HC)); X(ifree)(buf); if (!cldo) return (plan *)0; pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o); pln->n = n; pln->is = p->sz->dims[0].is; pln->os = p->sz->dims[0].os; pln->clde = clde; pln->cldo = cldo; pln->td = 0; X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); X(ops_zero)(&ops); ops.other = n/2; ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2; ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2; /* tweak ops.other so that r2hc-pad is used for small sizes, which seems to be a lot faster on my machine: */ ops.other += 256; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_rdft *p = (const problem_rdft *) p_; P *pln; INT n, npad; INT is, os; plan *cld1 = (plan *) 0; plan *cld2 = (plan *) 0; plan *cld_omega = (plan *) 0; R *buf = (R *) 0; problem *cldp; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *) 0; n = p->sz->dims[0].n; is = p->sz->dims[0].is; os = p->sz->dims[0].os; if (ego->pad) npad = choose_transform_size(2 * (n - 1) - 1); else npad = n - 1; /* initial allocation for the purpose of planning */ buf = (R *) MALLOC(sizeof(R) * npad, BUFFERS); cld1 = X(mkplan_f_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(npad, 1, 1), X(mktensor_1d)(1, 0, 0), buf, buf, R2HC), NO_SLOW, 0, 0); if (!cld1) goto nada; cldp = X(mkproblem_rdft_1_d)( X(mktensor_1d)(npad, 1, 1), X(mktensor_1d)(1, 0, 0), buf, buf, #if R2HC_ONLY_CONV R2HC #else HC2R #endif ); if (!(cld2 = X(mkplan_f_d)(plnr, cldp, NO_SLOW, 0, 0))) goto nada; /* plan for omega */ cld_omega = X(mkplan_f_d)(plnr, X(mkproblem_rdft_1_d)( X(mktensor_1d)(npad, 1, 1), X(mktensor_1d)(1, 0, 0), buf, buf, R2HC), NO_SLOW, ESTIMATE, 0); if (!cld_omega) goto nada; /* deallocate buffers; let awake() or apply() allocate them for real */ X(ifree)(buf); buf = 0; pln = MKPLAN_RDFT(P, &padt, apply); pln->cld1 = cld1; pln->cld2 = cld2; pln->cld_omega = cld_omega; pln->omega = 0; pln->n = n; pln->npad = npad; pln->is = is; pln->os = os; X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); pln->super.super.ops.other += (npad/2-1)*6 + npad + n + (n-1) * ego->pad; pln->super.super.ops.add += (npad/2-1)*2 + 2 + (n-1) * ego->pad; pln->super.super.ops.mul += (npad/2-1)*4 + 2 + ego->pad; #if R2HC_ONLY_CONV pln->super.super.ops.other += n-2 - ego->pad; pln->super.super.ops.add += (npad/2-1)*2 + (n-2) - ego->pad; #endif return &(pln->super.super); nada: X(ifree0)(buf); X(plan_destroy_internal)(cld_omega); X(plan_destroy_internal)(cld2); X(plan_destroy_internal)(cld1); return 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const hc2hc_solver *ego = (const hc2hc_solver *) ego_; const problem_rdft *p; P *pln = 0; plan *cld = 0, *cldw = 0; INT n, r, m, v, ivs, ovs; iodim *d; static const plan_adt padt = { fftwf_rdft_solve, awake, print, destroy }; if (NO_NONTHREADEDP(plnr) || !fftwf_hc2hc_applicable(ego, p_, plnr)) return (plan *) 0; p = (const problem_rdft *) p_; d = p->sz->dims; n = d[0].n; r = fftwf_choose_radix(ego->r, n); m = n / r; fftwf_tensor_tornk1(p->vecsz, &v, &ivs, &ovs); switch (p->kind[0]) { case R2HC: cldw = ego->mkcldw(ego, R2HC, r, m, d[0].os, v, ovs, 0, (m+2)/2, p->O, plnr); if (!cldw) goto nada; cld = fftwf_mkplan_d(plnr, fftwf_mkproblem_rdft_d( fftwf_mktensor_1d(m, r * d[0].is, d[0].os), fftwf_mktensor_2d(r, d[0].is, m * d[0].os, v, ivs, ovs), p->I, p->O, p->kind) ); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, apply_dit); break; case HC2R: cldw = ego->mkcldw(ego, HC2R, r, m, d[0].is, v, ivs, 0, (m+2)/2, p->I, plnr); if (!cldw) goto nada; cld = fftwf_mkplan_d(plnr, fftwf_mkproblem_rdft_d( fftwf_mktensor_1d(m, d[0].is, r * d[0].os), fftwf_mktensor_2d(r, m * d[0].is, d[0].os, v, ivs, ovs), p->I, p->O, p->kind) ); if (!cld) goto nada; pln = MKPLAN_RDFT(P, &padt, apply_dif); break; default: A(0); } pln->cld = cld; pln->cldw = cldw; pln->r = r; fftwf_ops_add(&cld->ops, &cldw->ops, &pln->super.super.ops); /* inherit could_prune_now_p attribute from cldw */ pln->super.super.could_prune_now_p = cldw->could_prune_now_p; return &(pln->super.super); nada: fftwf_plan_destroy_internal(cldw); fftwf_plan_destroy_internal(cld); return (plan *) 0; }