Exemple #1
0
/* generic routine that produces cld0 and cldm, used by inferior
   solvers */
int X(hc2hc_mkcldrn)(rdft_kind kind, INT r, INT m, INT s,
                     INT mstart, INT mcount,
                     R *IO, planner *plnr,
                     plan **cld0p, plan **cldmp)
{
    tensor *radix = X(mktensor_1d)(r, m * s, m * s);
    tensor *null = X(mktensor_0d)();
    INT imid = s * (m/2);
    plan *cld0 = 0, *cldm = 0;

    A(R2HC_KINDP(kind) || HC2R_KINDP(kind));
    A(mstart >= 0 && mcount > 0 && mstart + mcount <= (m + 2) / 2);

    cld0 = X(mkplan_d)(plnr,
                       X(mkproblem_rdft_1)(mstart == 0 ? radix : null,
                                           null, IO, IO, kind));
    if (!cld0) goto nada;

    cldm = X(mkplan_d)(plnr,
                       X(mkproblem_rdft_1)(
                           (m%2 || mstart+mcount < (m+2)/2) ? null : radix,
                           null, IO + imid, IO + imid,
                           R2HC_KINDP(kind) ? R2HCII : HC2RIII));
    if (!cldm) goto nada;

    X(tensor_destroy2)(null, radix);
    *cld0p = cld0;
    *cldmp = cldm;
    return 1;

nada:
    X(tensor_destroy2)(null, radix);
    X(plan_destroy_internal)(cld0);
    X(plan_destroy_internal)(cldm);
    return 0;
}
Exemple #2
0
/* Same as X(mkproblem_rdft2_d), but with only one R pointer.
   Used by the API. */
problem *X(mkproblem_rdft2_d_3pointers)(tensor *sz, tensor *vecsz,
					R *r0, R *cr, R *ci, rdft_kind kind)
{
     problem *p;
     int rnk = sz->rnk;
     R *r1;

     if (rnk == 0)
	  r1 = r0;
     else if (R2HC_KINDP(kind)) {
	  r1 = r0 + sz->dims[rnk-1].is;
	  sz->dims[rnk-1].is *= 2;
     } else {
	  r1 = r0 + sz->dims[rnk-1].os;
	  sz->dims[rnk-1].os *= 2;
     }

     p = X(mkproblem_rdft2)(sz, vecsz, r0, r1, cr, ci, kind);
     X(tensor_destroy2)(vecsz, sz);
     return p;
}
Exemple #3
0
static void zero(const problem *ego_)
{
     const problem_rdft2 *ego = (const problem_rdft2 *) ego_;
     if (R2HC_KINDP(ego->kind)) {
	  /* FIXME: can we avoid the double recursion somehow? */
	  vrecur(ego->vecsz->dims, ego->vecsz->rnk, 
		 ego->sz->dims, ego->sz->rnk, 
		 UNTAINT(ego->r0), UNTAINT(ego->r1));
     } else {
	  tensor *sz;
	  tensor *sz2 = X(tensor_copy)(ego->sz);
	  int rnk = sz2->rnk;
	  if (rnk > 0) /* ~half as many complex outputs */
	       sz2->dims[rnk-1].n = 
		    X(rdft2_complex_n)(sz2->dims[rnk-1].n, ego->kind);
	  sz = X(tensor_append)(ego->vecsz, sz2);
	  X(tensor_destroy)(sz2);
	  X(dft_zerotens)(sz, UNTAINT(ego->cr), UNTAINT(ego->ci));
	  X(tensor_destroy)(sz);
     }
}
Exemple #4
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const problem_rdft *p;
     iodim *d;
     INT rs, cs, b, n;

     static const plan_adt padt = {
	  X(rdft_solve), X(null_awake), print, destroy
     };

     UNUSED(plnr);

     if (ego->bufferedp) {
	  if (!applicable_buf(ego_, p_))
	       return (plan *)0;
     } else {
	  if (!applicable(ego_, p_))
	       return (plan *)0;
     }

     p = (const problem_rdft *) p_;

     if (R2HC_KINDP(p->kind[0])) {
	  rs = p->sz->dims[0].is; cs = p->sz->dims[0].os;
	  pln = MKPLAN_RDFT(P, &padt, 
			    ego->bufferedp ? apply_buf_r2hc : apply_r2hc);
     } else {
	  rs = p->sz->dims[0].os; cs = p->sz->dims[0].is;
	  pln = MKPLAN_RDFT(P, &padt, 
			    ego->bufferedp ? apply_buf_hc2r : apply_hc2r);
     }

     d = p->sz->dims;
     n = d[0].n;

     pln->k = ego->k;
     pln->n = n;

     pln->rs0 = rs;
     pln->rs = X(mkstride)(n, 2 * rs);
     pln->csr = X(mkstride)(n, cs);
     pln->csi = X(mkstride)(n, -cs);
     pln->ioffset = ioffset(p->kind[0], n, cs);

     b = compute_batchsize(n);
     pln->brs = X(mkstride)(n, 2 * b);
     pln->bcsr = X(mkstride)(n, b);
     pln->bcsi = X(mkstride)(n, -b);
     pln->bioffset = ioffset(p->kind[0], n, b);

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);

     pln->slv = ego;
     X(ops_zero)(&pln->super.super.ops);

     X(ops_madd2)(pln->vl / ego->desc->genus->vl,
		  &ego->desc->ops,
		  &pln->super.super.ops);

     if (ego->bufferedp) 
	  pln->super.super.ops.other += 2 * n * pln->vl;

     pln->super.super.could_prune_now_p = !ego->bufferedp;

     return &(pln->super.super);
}