static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft2 *p;
     P *pln;
     plan *cld;
     int vdim;
     iodim *d;
     INT rvs, cvs;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &vdim))
          return (plan *) 0;
     p = (const problem_rdft2 *) p_;

     d = p->vecsz->dims + vdim;

     A(d->n > 1);  /* or else, p->ri + d->is etc. are invalid */

     X(rdft2_strides)(p->kind, d, &rvs, &cvs);

     cld = X(mkplan_d)(plnr, 
		       X(mkproblem_rdft2_d)(
			    X(tensor_copy)(p->sz),
			    X(tensor_copy_except)(p->vecsz, vdim),
			    TAINT(p->r0, rvs), TAINT(p->r1, rvs), 
			    TAINT(p->cr, cvs), TAINT(p->ci, cvs),
			    p->kind));
     if (!cld) return (plan *) 0;

     pln = MKPLAN_RDFT2(P, &padt, apply);

     pln->cld = cld;
     pln->vl = d->n;
     pln->rvs = rvs;
     pln->cvs = cvs;

     pln->solver = ego;
     X(ops_zero)(&pln->super.super.ops);
     pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);

     if (p->sz->rnk != 1 || (p->sz->dims[0].n > 128))
	  pln->super.super.pcost = pln->vl * cld->pcost;

     return &(pln->super.super);
}
示例#2
0
static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
{
     static const plan_adt padt = {
	  X(rdft2_solve), X(null_awake), print, X(plan_null_destroy)
     };
     plan_rdft2 *pln;

     UNUSED(plnr);

     if (!applicable(ego, p))
          return (plan *) 0;
     pln = MKPLAN_RDFT2(plan_rdft2, &padt, apply);
     X(ops_zero)(&pln->super.ops);

     return &(pln->super);
}
示例#3
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_rdft2 *p;
     plan *cldcpy = (plan *) 0;
     P *pln;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     UNUSED(ego_);

     if (!applicable(p_))
          return (plan *) 0;

     p = (const problem_rdft2 *) p_;

     if (p->kind == HC2R) {
	  cldcpy = X(mkplan_d)(plnr,
			       X(mkproblem_rdft_0_d)(
				    X(tensor_copy)(p->vecsz),
				    p->cr, p->r0));
	  if (!cldcpy) return (plan *) 0;
     }

     pln = MKPLAN_RDFT2(P, &padt, 
			p->kind == R2HC ? 
			(p->r0 == p->cr ? apply_r2hc_inplace : apply_r2hc) 
			: apply_hc2r);
     
     if (p->kind == R2HC)
	  X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     pln->cldcpy = cldcpy;

     if (p->kind == R2HC) {
	  /* vl loads, 2*vl stores */
	  X(ops_other)(3 * pln->vl, &pln->super.super.ops);
     }
     else {
	  pln->super.super.ops = cldcpy->ops;
     }

     return &(pln->super.super);
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft2 *p;
     P *pln;
     problem *cldp;
     int vdim;
     iodim *d;
     plan **cldrn = (plan **) 0;
     int i, nthr;
     INT its, ots, block_size;
     tensor *vecsz;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &vdim))
          return (plan *) 0;
     p = (const problem_rdft2 *) p_;

     d = p->vecsz->dims + vdim;

     block_size = (d->n + plnr->nthr - 1) / plnr->nthr;
     nthr = (int)((d->n + block_size - 1) / block_size);
     plnr->nthr = (plnr->nthr + nthr - 1) / nthr;
     X(rdft2_strides)(p->kind, d, &its, &ots);
     its *= block_size; ots *= block_size;

     cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS);
     for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0;
     
     vecsz = X(tensor_copy)(p->vecsz);
     for (i = 0; i < nthr; ++i) {
	  vecsz->dims[vdim].n =
	       (i == nthr - 1) ? (d->n - i*block_size) : block_size;
	  cldp = X(mkproblem_rdft2)(p->sz, vecsz,
				    p->r0 + i*its, p->r1 + i*its,
				    p->cr + i*ots, p->ci + i*ots, 
				    p->kind);
	  cldrn[i] = X(mkplan_d)(plnr, cldp);
	  if (!cldrn[i]) goto nada;
     }
     X(tensor_destroy)(vecsz);

     pln = MKPLAN_RDFT2(P, &padt, apply);

     pln->cldrn = cldrn;
     pln->its = its;
     pln->ots = ots;
     pln->nthr = nthr;

     pln->solver = ego;
     X(ops_zero)(&pln->super.super.ops);
     pln->super.super.pcost = 0;
     for (i = 0; i < nthr; ++i) {
	  X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops);
	  pln->super.super.pcost += cldrn[i]->pcost;
     }

     return &(pln->super.super);

 nada:
     if (cldrn) {
	  for (i = 0; i < nthr; ++i)
	       X(plan_destroy_internal)(cldrn[i]);
	  X(ifree)(cldrn);
     }
     X(tensor_destroy)(vecsz);
     return (plan *) 0;
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, ioffset, roffset, id, od;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     UNUSED(ego_);

     if (!applicable(p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl);
     bufdist = X(bufdist)(n + 2, vl); /* complex-side rdft2 stores N+2
					 real numbers */
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
	so as to allow optimizations in the the copy plan */
     roffset = (p->cr - p->ci > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);

     id = ivs * (nbuf * (vl / nbuf));
     od = ovs * (nbuf * (vl / nbuf));

     if (p->kind == R2HC) {
	  /* allow destruction of input if problem is in place */
	  cld = X(mkplan_f_d)(
	       plnr, 
	       X(mkproblem_rdft2_d)(
		    X(mktensor_1d)(n, p->sz->dims[0].is, 2),
		    X(mktensor_1d)(nbuf, ivs, bufdist),
		    TAINT(p->r0, ivs * nbuf), TAINT(p->r1, ivs * nbuf),
		    bufs + roffset, bufs + ioffset, p->kind),
	       0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
	  if (!cld) goto nada;

	  /* copying back from the buffer is a rank-0 DFT: */
	  cldcpy = X(mkplan_d)(
	       plnr, 
	       X(mkproblem_dft_d)(
		    X(mktensor_0d)(),
		    X(mktensor_2d)(nbuf, bufdist, ovs,
				   n/2+1, 2, p->sz->dims[0].os),
		    bufs + roffset, bufs + ioffset,
		    TAINT(p->cr, ovs * nbuf), TAINT(p->ci, ovs * nbuf) ));
	  if (!cldcpy) goto nada;

	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + id, p->r1 + id, 
				     p->cr + od, p->ci + od,
				     p->kind));
	  if (!cldrest) goto nada;
	  pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
     } else {
	  /* allow destruction of buffer */
	  cld = X(mkplan_f_d)(
	       plnr, 
	       X(mkproblem_rdft2_d)(
		    X(mktensor_1d)(n, 2, p->sz->dims[0].os),
		    X(mktensor_1d)(nbuf, bufdist, ovs),
		    TAINT(p->r0, ovs * nbuf), TAINT(p->r1, ovs * nbuf),
		    bufs + roffset, bufs + ioffset, p->kind),
	       0, 0, NO_DESTROY_INPUT);
	  if (!cld) goto nada;

	  /* copying input into buffer is a rank-0 DFT: */
	  cldcpy = X(mkplan_d)(
	       plnr, 
	       X(mkproblem_dft_d)(
		    X(mktensor_0d)(),
		    X(mktensor_2d)(nbuf, ivs, bufdist,
				   n/2+1, p->sz->dims[0].is, 2),
		    TAINT(p->cr, ivs * nbuf), TAINT(p->ci, ivs * nbuf), 
		    bufs + roffset, bufs + ioffset));
	  if (!cldcpy) goto nada;

	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + od, p->r1 + od, 
				     p->cr + id, p->ci + id,
				     p->kind));
	  if (!cldrest) goto nada;

	  pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
     }

     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
	  opcnt t;
	  X(ops_add)(&cld->ops, &cldcpy->ops, &t);
	  X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
示例#6
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     plan *cld = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, rs, id, od;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     if (!applicable(p_, ego, plnr))
          goto nada;

     n = p->sz->dims[0].n;
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(imax)(X(nbuf)(n, vl, 0), min_nbuf(p, n, vl));
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);

     id = ivs * (nbuf * (vl / nbuf));
     od = ovs * (nbuf * (vl / nbuf));

     if (p->kind == R2HC) {
	  cld = X(mkplan_f_d)(
	       plnr,
	       X(mkproblem_rdft_d)(
		    X(mktensor_1d)(n, p->sz->dims[0].is/2, 1),
		    X(mktensor_1d)(nbuf, ivs, bufdist),
		    TAINT(p->r0, ivs * nbuf), bufs, &p->kind),
	       0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
	  if (!cld) goto nada;
	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + id, p->r1 + id, 
				     p->cr + od, p->ci + od,
				     p->kind));
	  if (!cldrest) goto nada;

	  pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
     } else {
	  A(p->kind == HC2R);
	  cld = X(mkplan_f_d)(
	       plnr,
	       X(mkproblem_rdft_d)(
		    X(mktensor_1d)(n, 1, p->sz->dims[0].os/2),
		    X(mktensor_1d)(nbuf, bufdist, ovs),
		    bufs, TAINT(p->r0, ovs * nbuf), &p->kind),
	       0, 0, NO_DESTROY_INPUT); /* always ok to destroy bufs */
	  if (!cld) goto nada;
	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + od, p->r1 + od, 
				     p->cr + id, p->ci + id,
				     p->kind));
	  if (!cldrest) goto nada;
	  pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
     }

     pln->cld = cld;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     X(rdft2_strides)(p->kind, &p->sz->dims[0], &rs, &pln->cs);
     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops,
		 &pln->super.super.ops);
     pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl;

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
示例#7
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft2 *p;
     P *pln;
     plan *cldr = 0, *cldc = 0;
     tensor *sz1, *sz2, *vecszi, *sz2i;
     int spltrnk;
     inplace_kind k;
     problem *cldp;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &spltrnk))
          return (plan *) 0;

     p = (const problem_rdft2 *) p_;
     X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);

     k = p->kind == R2HC ? INPLACE_OS : INPLACE_IS;
     vecszi = X(tensor_copy_inplace)(p->vecsz, k);
     sz2i = X(tensor_copy_inplace)(sz2, k);

     /* complex data is ~half of real */
     sz2i->dims[sz2i->rnk - 1].n = sz2i->dims[sz2i->rnk - 1].n/2 + 1;

     cldr = X(mkplan_d)(plnr, 
		       X(mkproblem_rdft2_d)(X(tensor_copy)(sz2),
					    X(tensor_append)(p->vecsz, sz1),
					    p->r0, p->r1,
					    p->cr, p->ci, p->kind));
     if (!cldr) goto nada;

     if (p->kind == R2HC)
	  cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k),
				    X(tensor_append)(vecszi, sz2i),
				    p->cr, p->ci, p->cr, p->ci);
     else /* HC2R must swap re/im parts to get IDFT */
	  cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k),
				    X(tensor_append)(vecszi, sz2i),
				    p->ci, p->cr, p->ci, p->cr);
     cldc = X(mkplan_d)(plnr, cldp);
     if (!cldc) goto nada;

     pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply_hc2r);

     pln->cldr = cldr;
     pln->cldc = cldc;

     pln->solver = ego;
     X(ops_add)(&cldr->ops, &cldc->ops, &pln->super.super.ops);

     X(tensor_destroy4)(sz2i, vecszi, sz2, sz1);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldr);
     X(plan_destroy_internal)(cldc);
     X(tensor_destroy4)(sz2i, vecszi, sz2, sz1);
     return (plan *) 0;
}