static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_rdft2 *p; P *pln; plan *cld; int vdim; iodim *d; INT rvs, cvs; static const plan_adt padt = { X(rdft2_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &vdim)) return (plan *) 0; p = (const problem_rdft2 *) p_; d = p->vecsz->dims + vdim; A(d->n > 1); /* or else, p->ri + d->is etc. are invalid */ X(rdft2_strides)(p->kind, d, &rvs, &cvs); cld = X(mkplan_d)(plnr, X(mkproblem_rdft2_d)( X(tensor_copy)(p->sz), X(tensor_copy_except)(p->vecsz, vdim), TAINT(p->r0, rvs), TAINT(p->r1, rvs), TAINT(p->cr, cvs), TAINT(p->ci, cvs), p->kind)); if (!cld) return (plan *) 0; pln = MKPLAN_RDFT2(P, &padt, apply); pln->cld = cld; pln->vl = d->n; pln->rvs = rvs; pln->cvs = cvs; pln->solver = ego; X(ops_zero)(&pln->super.super.ops); pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */ X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops); if (p->sz->rnk != 1 || (p->sz->dims[0].n > 128)) pln->super.super.pcost = pln->vl * cld->pcost; return &(pln->super.super); }
static plan *mkplan(const solver *ego, const problem *p, planner *plnr) { static const plan_adt padt = { X(rdft2_solve), X(null_awake), print, X(plan_null_destroy) }; plan_rdft2 *pln; UNUSED(plnr); if (!applicable(ego, p)) return (plan *) 0; pln = MKPLAN_RDFT2(plan_rdft2, &padt, apply); X(ops_zero)(&pln->super.ops); return &(pln->super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const problem_rdft2 *p; plan *cldcpy = (plan *) 0; P *pln; static const plan_adt padt = { X(rdft2_solve), awake, print, destroy }; UNUSED(ego_); if (!applicable(p_)) return (plan *) 0; p = (const problem_rdft2 *) p_; if (p->kind == HC2R) { cldcpy = X(mkplan_d)(plnr, X(mkproblem_rdft_0_d)( X(tensor_copy)(p->vecsz), p->cr, p->r0)); if (!cldcpy) return (plan *) 0; } pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? (p->r0 == p->cr ? apply_r2hc_inplace : apply_r2hc) : apply_hc2r); if (p->kind == R2HC) X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->cldcpy = cldcpy; if (p->kind == R2HC) { /* vl loads, 2*vl stores */ X(ops_other)(3 * pln->vl, &pln->super.super.ops); } else { pln->super.super.ops = cldcpy->ops; } return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_rdft2 *p; P *pln; problem *cldp; int vdim; iodim *d; plan **cldrn = (plan **) 0; int i, nthr; INT its, ots, block_size; tensor *vecsz; static const plan_adt padt = { X(rdft2_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &vdim)) return (plan *) 0; p = (const problem_rdft2 *) p_; d = p->vecsz->dims + vdim; block_size = (d->n + plnr->nthr - 1) / plnr->nthr; nthr = (int)((d->n + block_size - 1) / block_size); plnr->nthr = (plnr->nthr + nthr - 1) / nthr; X(rdft2_strides)(p->kind, d, &its, &ots); its *= block_size; ots *= block_size; cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS); for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0; vecsz = X(tensor_copy)(p->vecsz); for (i = 0; i < nthr; ++i) { vecsz->dims[vdim].n = (i == nthr - 1) ? (d->n - i*block_size) : block_size; cldp = X(mkproblem_rdft2)(p->sz, vecsz, p->r0 + i*its, p->r1 + i*its, p->cr + i*ots, p->ci + i*ots, p->kind); cldrn[i] = X(mkplan_d)(plnr, cldp); if (!cldrn[i]) goto nada; } X(tensor_destroy)(vecsz); pln = MKPLAN_RDFT2(P, &padt, apply); pln->cldrn = cldrn; pln->its = its; pln->ots = ots; pln->nthr = nthr; pln->solver = ego; X(ops_zero)(&pln->super.super.ops); pln->super.super.pcost = 0; for (i = 0; i < nthr; ++i) { X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops); pln->super.super.pcost += cldrn[i]->pcost; } return &(pln->super.super); nada: if (cldrn) { for (i = 0; i < nthr; ++i) X(plan_destroy_internal)(cldrn[i]); X(ifree)(cldrn); } X(tensor_destroy)(vecsz); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; plan *cld = (plan *) 0; plan *cldcpy = (plan *) 0; plan *cldrest = (plan *) 0; const problem_rdft2 *p = (const problem_rdft2 *) p_; R *bufs = (R *) 0; INT nbuf = 0, bufdist, n, vl; INT ivs, ovs, ioffset, roffset, id, od; static const plan_adt padt = { X(rdft2_solve), awake, print, destroy }; UNUSED(ego_); if (!applicable(p_, plnr)) goto nada; n = X(tensor_sz)(p->sz); X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); nbuf = X(nbuf)(n, vl); bufdist = X(bufdist)(n + 2, vl); /* complex-side rdft2 stores N+2 real numbers */ A(nbuf > 0); /* attempt to keep real and imaginary part in the same order, so as to allow optimizations in the the copy plan */ roffset = (p->cr - p->ci > 0) ? (INT)1 : (INT)0; ioffset = 1 - roffset; /* initial allocation for the purpose of planning */ bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); id = ivs * (nbuf * (vl / nbuf)); od = ovs * (nbuf * (vl / nbuf)); if (p->kind == R2HC) { /* allow destruction of input if problem is in place */ cld = X(mkplan_f_d)( plnr, X(mkproblem_rdft2_d)( X(mktensor_1d)(n, p->sz->dims[0].is, 2), X(mktensor_1d)(nbuf, ivs, bufdist), TAINT(p->r0, ivs * nbuf), TAINT(p->r1, ivs * nbuf), bufs + roffset, bufs + ioffset, p->kind), 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0); if (!cld) goto nada; /* copying back from the buffer is a rank-0 DFT: */ cldcpy = X(mkplan_d)( plnr, X(mkproblem_dft_d)( X(mktensor_0d)(), X(mktensor_2d)(nbuf, bufdist, ovs, n/2+1, 2, p->sz->dims[0].os), bufs + roffset, bufs + ioffset, TAINT(p->cr, ovs * nbuf), TAINT(p->ci, ovs * nbuf) )); if (!cldcpy) goto nada; X(ifree)(bufs); bufs = 0; cldrest = X(mkplan_d)(plnr, X(mkproblem_rdft2_d)( X(tensor_copy)(p->sz), X(mktensor_1d)(vl % nbuf, ivs, ovs), p->r0 + id, p->r1 + id, p->cr + od, p->ci + od, p->kind)); if (!cldrest) goto nada; pln = MKPLAN_RDFT2(P, &padt, apply_r2hc); } else { /* allow destruction of buffer */ cld = X(mkplan_f_d)( plnr, X(mkproblem_rdft2_d)( X(mktensor_1d)(n, 2, p->sz->dims[0].os), X(mktensor_1d)(nbuf, bufdist, ovs), TAINT(p->r0, ovs * nbuf), TAINT(p->r1, ovs * nbuf), bufs + roffset, bufs + ioffset, p->kind), 0, 0, NO_DESTROY_INPUT); if (!cld) goto nada; /* copying input into buffer is a rank-0 DFT: */ cldcpy = X(mkplan_d)( plnr, X(mkproblem_dft_d)( X(mktensor_0d)(), X(mktensor_2d)(nbuf, ivs, bufdist, n/2+1, p->sz->dims[0].is, 2), TAINT(p->cr, ivs * nbuf), TAINT(p->ci, ivs * nbuf), bufs + roffset, bufs + ioffset)); if (!cldcpy) goto nada; X(ifree)(bufs); bufs = 0; cldrest = X(mkplan_d)(plnr, X(mkproblem_rdft2_d)( X(tensor_copy)(p->sz), X(mktensor_1d)(vl % nbuf, ivs, ovs), p->r0 + od, p->r1 + od, p->cr + id, p->ci + id, p->kind)); if (!cldrest) goto nada; pln = MKPLAN_RDFT2(P, &padt, apply_hc2r); } pln->cld = cld; pln->cldcpy = cldcpy; pln->cldrest = cldrest; pln->n = n; pln->vl = vl; pln->ivs_by_nbuf = ivs * nbuf; pln->ovs_by_nbuf = ovs * nbuf; pln->roffset = roffset; pln->ioffset = ioffset; pln->nbuf = nbuf; pln->bufdist = bufdist; { opcnt t; X(ops_add)(&cld->ops, &cldcpy->ops, &t); X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops); } return &(pln->super.super); nada: X(ifree0)(bufs); X(plan_destroy_internal)(cldrest); X(plan_destroy_internal)(cldcpy); X(plan_destroy_internal)(cld); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; P *pln; plan *cld = (plan *) 0; plan *cldrest = (plan *) 0; const problem_rdft2 *p = (const problem_rdft2 *) p_; R *bufs = (R *) 0; INT nbuf = 0, bufdist, n, vl; INT ivs, ovs, rs, id, od; static const plan_adt padt = { X(rdft2_solve), awake, print, destroy }; if (!applicable(p_, ego, plnr)) goto nada; n = p->sz->dims[0].n; X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs); nbuf = X(imax)(X(nbuf)(n, vl, 0), min_nbuf(p, n, vl)); bufdist = X(bufdist)(n, vl); A(nbuf > 0); /* initial allocation for the purpose of planning */ bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS); id = ivs * (nbuf * (vl / nbuf)); od = ovs * (nbuf * (vl / nbuf)); if (p->kind == R2HC) { cld = X(mkplan_f_d)( plnr, X(mkproblem_rdft_d)( X(mktensor_1d)(n, p->sz->dims[0].is/2, 1), X(mktensor_1d)(nbuf, ivs, bufdist), TAINT(p->r0, ivs * nbuf), bufs, &p->kind), 0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0); if (!cld) goto nada; X(ifree)(bufs); bufs = 0; cldrest = X(mkplan_d)(plnr, X(mkproblem_rdft2_d)( X(tensor_copy)(p->sz), X(mktensor_1d)(vl % nbuf, ivs, ovs), p->r0 + id, p->r1 + id, p->cr + od, p->ci + od, p->kind)); if (!cldrest) goto nada; pln = MKPLAN_RDFT2(P, &padt, apply_r2hc); } else { A(p->kind == HC2R); cld = X(mkplan_f_d)( plnr, X(mkproblem_rdft_d)( X(mktensor_1d)(n, 1, p->sz->dims[0].os/2), X(mktensor_1d)(nbuf, bufdist, ovs), bufs, TAINT(p->r0, ovs * nbuf), &p->kind), 0, 0, NO_DESTROY_INPUT); /* always ok to destroy bufs */ if (!cld) goto nada; X(ifree)(bufs); bufs = 0; cldrest = X(mkplan_d)(plnr, X(mkproblem_rdft2_d)( X(tensor_copy)(p->sz), X(mktensor_1d)(vl % nbuf, ivs, ovs), p->r0 + od, p->r1 + od, p->cr + id, p->ci + id, p->kind)); if (!cldrest) goto nada; pln = MKPLAN_RDFT2(P, &padt, apply_hc2r); } pln->cld = cld; pln->cldrest = cldrest; pln->n = n; pln->vl = vl; pln->ivs = ivs; pln->ovs = ovs; X(rdft2_strides)(p->kind, &p->sz->dims[0], &rs, &pln->cs); pln->nbuf = nbuf; pln->bufdist = bufdist; X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops, &pln->super.super.ops); pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl; return &(pln->super.super); nada: X(ifree0)(bufs); X(plan_destroy_internal)(cldrest); X(plan_destroy_internal)(cld); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_rdft2 *p; P *pln; plan *cldr = 0, *cldc = 0; tensor *sz1, *sz2, *vecszi, *sz2i; int spltrnk; inplace_kind k; problem *cldp; static const plan_adt padt = { X(rdft2_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr, &spltrnk)) return (plan *) 0; p = (const problem_rdft2 *) p_; X(tensor_split)(p->sz, &sz1, spltrnk, &sz2); k = p->kind == R2HC ? INPLACE_OS : INPLACE_IS; vecszi = X(tensor_copy_inplace)(p->vecsz, k); sz2i = X(tensor_copy_inplace)(sz2, k); /* complex data is ~half of real */ sz2i->dims[sz2i->rnk - 1].n = sz2i->dims[sz2i->rnk - 1].n/2 + 1; cldr = X(mkplan_d)(plnr, X(mkproblem_rdft2_d)(X(tensor_copy)(sz2), X(tensor_append)(p->vecsz, sz1), p->r0, p->r1, p->cr, p->ci, p->kind)); if (!cldr) goto nada; if (p->kind == R2HC) cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k), X(tensor_append)(vecszi, sz2i), p->cr, p->ci, p->cr, p->ci); else /* HC2R must swap re/im parts to get IDFT */ cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k), X(tensor_append)(vecszi, sz2i), p->ci, p->cr, p->ci, p->cr); cldc = X(mkplan_d)(plnr, cldp); if (!cldc) goto nada; pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply_hc2r); pln->cldr = cldr; pln->cldc = cldc; pln->solver = ego; X(ops_add)(&cldr->ops, &cldc->ops, &pln->super.super.ops); X(tensor_destroy4)(sz2i, vecszi, sz2, sz1); return &(pln->super.super); nada: X(plan_destroy_internal)(cldr); X(plan_destroy_internal)(cldc); X(tensor_destroy4)(sz2i, vecszi, sz2, sz1); return (plan *) 0; }