/* generic routine that produces cld0 and cldm, used by inferior solvers */ int X(hc2hc_mkcldrn)(rdft_kind kind, INT r, INT m, INT s, INT mstart, INT mcount, R *IO, planner *plnr, plan **cld0p, plan **cldmp) { tensor *radix = X(mktensor_1d)(r, m * s, m * s); tensor *null = X(mktensor_0d)(); INT imid = s * (m/2); plan *cld0 = 0, *cldm = 0; A(R2HC_KINDP(kind) || HC2R_KINDP(kind)); A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m + 2) / 2); cld0 = X(mkplan_d)(plnr, X(mkproblem_rdft_1)(mstart == 0 ? radix : null, null, IO, IO, kind)); if (!cld0) goto nada; cldm = X(mkplan_d)(plnr, X(mkproblem_rdft_1)( (m%2 || mstart+mcount < (m+2)/2) ? null : radix, null, IO + imid, IO + imid, R2HC_KINDP(kind) ? R2HCII : HC2RIII)); if (!cldm) goto nada; X(tensor_destroy2)(null, radix); *cld0p = cld0; *cldmp = cldm; return 1; nada: X(tensor_destroy2)(null, radix); X(plan_destroy_internal)(cld0); X(plan_destroy_internal)(cldm); return 0; }
/* Same as X(mkproblem_rdft2_d), but with only one R pointer. Used by the API. */ problem *X(mkproblem_rdft2_d_3pointers)(tensor *sz, tensor *vecsz, R *r0, R *cr, R *ci, rdft_kind kind) { problem *p; int rnk = sz->rnk; R *r1; if (rnk == 0) r1 = r0; else if (R2HC_KINDP(kind)) { r1 = r0 + sz->dims[rnk-1].is; sz->dims[rnk-1].is *= 2; } else { r1 = r0 + sz->dims[rnk-1].os; sz->dims[rnk-1].os *= 2; } p = X(mkproblem_rdft2)(sz, vecsz, r0, r1, cr, ci, kind); X(tensor_destroy2)(vecsz, sz); return p; }
static void zero(const problem *ego_) { const problem_rdft2 *ego = (const problem_rdft2 *) ego_; if (R2HC_KINDP(ego->kind)) { /* FIXME: can we avoid the double recursion somehow? */ vrecur(ego->vecsz->dims, ego->vecsz->rnk, ego->sz->dims, ego->sz->rnk, UNTAINT(ego->r0), UNTAINT(ego->r1)); } else { tensor *sz; tensor *sz2 = X(tensor_copy)(ego->sz); int rnk = sz2->rnk; if (rnk > 0) /* ~half as many complex outputs */ sz2->dims[rnk-1].n = X(rdft2_complex_n)(sz2->dims[rnk-1].n, ego->kind); sz = X(tensor_append)(ego->vecsz, sz2); X(tensor_destroy)(sz2); X(dft_zerotens)(sz, UNTAINT(ego->cr), UNTAINT(ego->ci)); X(tensor_destroy)(sz); } }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; P *pln; const problem_rdft *p; iodim *d; INT rs, cs, b, n; static const plan_adt padt = { X(rdft_solve), X(null_awake), print, destroy }; UNUSED(plnr); if (ego->bufferedp) { if (!applicable_buf(ego_, p_)) return (plan *)0; } else { if (!applicable(ego_, p_)) return (plan *)0; } p = (const problem_rdft *) p_; if (R2HC_KINDP(p->kind[0])) { rs = p->sz->dims[0].is; cs = p->sz->dims[0].os; pln = MKPLAN_RDFT(P, &padt, ego->bufferedp ? apply_buf_r2hc : apply_r2hc); } else { rs = p->sz->dims[0].os; cs = p->sz->dims[0].is; pln = MKPLAN_RDFT(P, &padt, ego->bufferedp ? apply_buf_hc2r : apply_hc2r); } d = p->sz->dims; n = d[0].n; pln->k = ego->k; pln->n = n; pln->rs0 = rs; pln->rs = X(mkstride)(n, 2 * rs); pln->csr = X(mkstride)(n, cs); pln->csi = X(mkstride)(n, -cs); pln->ioffset = ioffset(p->kind[0], n, cs); b = compute_batchsize(n); pln->brs = X(mkstride)(n, 2 * b); pln->bcsr = X(mkstride)(n, b); pln->bcsi = X(mkstride)(n, -b); pln->bioffset = ioffset(p->kind[0], n, b); X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); pln->slv = ego; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl / ego->desc->genus->vl, &ego->desc->ops, &pln->super.super.ops); if (ego->bufferedp) pln->super.super.ops.other += 2 * n * pln->vl; pln->super.super.could_prune_now_p = !ego->bufferedp; return &(pln->super.super); }