static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_dft *p;
     P *pln;
     plan *cld;
     int vdim;
     iodim *d;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &vdim))
          return (plan *) 0;
     p = (const problem_dft *) p_;

     d = p->vecsz->dims + vdim;

     A(d->n > 1);
     cld = X(mkplan_d)(plnr,
		       X(mkproblem_dft_d)(
			    X(tensor_copy)(p->sz),
			    X(tensor_copy_except)(p->vecsz, vdim),
			    TAINT(p->ri, d->is), TAINT(p->ii, d->is),
			    TAINT(p->ro, d->os), TAINT(p->io, d->os)));
     if (!cld) return (plan *) 0;

     pln = MKPLAN_DFT(P, &padt, apply);

     pln->cld = cld;
     pln->vl = d->n;
     pln->ivs = d->is;
     pln->ovs = d->os;

     pln->solver = ego;
     X(ops_zero)(&pln->super.super.ops);
     pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
     pln->super.super.pcost = pln->vl * cld->pcost;

     return &(pln->super.super);
}
Exemplo n.º 2
0
static plan *mkcldw(const hc2hc_solver *ego_, 
		    rdft_kind kind, INT r, INT m, INT ms, INT v, INT vs, 
		    INT mstart, INT mcount,
		    R *IO, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const hc2hc_desc *e = ego->desc;
     plan *cld0 = 0, *cldm = 0;
     INT imid = (m / 2) * ms;
     INT rs = m * ms;

     static const plan_adt padt = {
	  0, awake, print, destroy
     };

     if (!applicable(ego, kind, r, m, v, plnr))
          return (plan *)0;

     cld0 = X(mkplan_d)(
	  plnr, 
	  X(mkproblem_rdft_1_d)((CLD0P(mstart) ?
				 X(mktensor_1d)(r, rs, rs) : X(mktensor_0d)()),
				X(mktensor_0d)(),
				TAINT(IO, vs), TAINT(IO, vs), 
				kind));
     if (!cld0) goto nada;

     cldm = X(mkplan_d)(
	  plnr, 
	  X(mkproblem_rdft_1_d)((CLDMP(m, mstart, mcount) ?
				 X(mktensor_1d)(r, rs, rs) : X(mktensor_0d)()),
				X(mktensor_0d)(),
				TAINT(IO + imid, vs), TAINT(IO + imid, vs),
				kind == R2HC ? R2HCII : HC2RIII));
     if (!cldm) goto nada;
	  
     pln = MKPLAN_HC2HC(P, &padt, ego->bufferedp ? apply_buf : apply);

     pln->k = ego->k;
     pln->td = 0;
     pln->r = r; pln->rs = X(mkstride)(r, rs);
     pln->m = m; pln->ms = ms;
     pln->v = v; pln->vs = vs;
     pln->slv = ego;
     pln->brs = X(mkstride)(r, 2 * compute_batchsize(r));
     pln->cld0 = cld0;
     pln->cldm = cldm;
     pln->mb = mstart + CLD0P(mstart);
     pln->me = mstart + mcount - CLDMP(m, mstart, mcount);

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(v * ((pln->me - pln->mb) / e->genus->vl),
		  &e->ops, &pln->super.super.ops);
     X(ops_madd2)(v, &cld0->ops, &pln->super.super.ops);
     X(ops_madd2)(v, &cldm->ops, &pln->super.super.ops);

     if (ego->bufferedp) 
	  pln->super.super.ops.other += 4 * r * (pln->me - pln->mb) * v;

     pln->super.super.could_prune_now_p =
	  (!ego->bufferedp && r >= 5 && r < 64 && m >= r);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld0);
     X(plan_destroy_internal)(cldm);
     return 0;
}
Exemplo n.º 3
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_dft *p = (const problem_dft *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, roffset, ioffset;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
	so as to allow optimizations in the the copy plan */
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);

     /* allow destruction of input if problem is in place */
     cld = X(mkplan_f_d)(plnr,
			 X(mkproblem_dft_d)(
			      X(mktensor_1d)(n, p->sz->dims[0].is, 2),
			      X(mktensor_1d)(nbuf, ivs, bufdist * 2),
			      TAINT(p->ri, ivs * nbuf),
			      TAINT(p->ii, ivs * nbuf),
			      bufs + roffset, 
			      bufs + ioffset),
			 0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
     if (!cld)
          goto nada;

     /* copying back from the buffer is a rank-0 transform: */
     cldcpy = X(mkplan_d)(plnr,
			  X(mkproblem_dft_d)(
			       X(mktensor_0d)(),
			       X(mktensor_2d)(nbuf, bufdist * 2, ovs,
					      n, 2, p->sz->dims[0].os),
			       bufs + roffset, 
			       bufs + ioffset, 
			       TAINT(p->ro, ovs * nbuf), 
			       TAINT(p->io, ovs * nbuf)));
     if (!cldcpy)
          goto nada;

     /* deallocate buffers, let apply() allocate them for real */
     X(ifree)(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
	  INT id = ivs * (nbuf * (vl / nbuf));
	  INT od = ovs * (nbuf * (vl / nbuf));
	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_dft_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->ri+id, p->ii+id, p->ro+od, p->io+od));
     }
     if (!cldrest)
          goto nada;

     pln = MKPLAN_DFT(P, &padt, apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
	  opcnt t;
	  X(ops_add)(&cld->ops, &cldcpy->ops, &t);
	  X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
static plan *mkcldw(const hc2c_solver *ego_, rdft_kind kind,
		    INT r, INT rs,
		    INT m, INT ms, 
		    INT v, INT vs,
		    R *cr, R *ci,
		    planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const hc2c_desc *e = ego->desc;
     plan *cld0 = 0, *cldm = 0;
     INT imid = (m / 2) * ms;
     INT extra_iter;

     static const plan_adt padt = {
	  0, awake, print, destroy
     };

     if (!applicable(ego, kind, r, rs, m, ms, v, vs, cr, ci, plnr, 
		     &extra_iter))
          return (plan *)0;

     cld0 = X(mkplan_d)(
	  plnr, 
	  X(mkproblem_rdft2_d)(X(mktensor_1d)(r, rs, rs),
			       X(mktensor_0d)(),
			       TAINT(cr, vs), TAINT(ci, vs),
			       TAINT(cr, vs), TAINT(ci, vs),
			       kind));
     if (!cld0) goto nada;

     cldm = X(mkplan_d)(
	  plnr, 
	  X(mkproblem_rdft2_d)(((m % 2) ?
				X(mktensor_0d)() : X(mktensor_1d)(r, rs, rs) ),
			       X(mktensor_0d)(),
			       TAINT(cr + imid, vs), TAINT(ci + imid, vs),
			       TAINT(cr + imid, vs), TAINT(ci + imid, vs),
			       kind == R2HC ? R2HCII : HC2RIII));
     if (!cldm) goto nada;

     if (ego->bufferedp)
	  pln = MKPLAN_HC2C(P, &padt, apply_buf);
     else
	  pln = MKPLAN_HC2C(P, &padt, extra_iter ? apply_extra_iter : apply);

     pln->k = ego->k;
     pln->td = 0;
     pln->r = r; pln->rs = X(mkstride)(r, rs);
     pln->m = m; pln->ms = ms;
     pln->v = v; pln->vs = vs;
     pln->slv = ego;
     pln->brs = X(mkstride)(r, 4 * compute_batchsize(r));
     pln->cld0 = cld0;
     pln->cldm = cldm;
     pln->extra_iter = extra_iter;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(v * (((m - 1) / 2) / e->genus->vl),
		  &e->ops, &pln->super.super.ops);
     X(ops_madd2)(v, &cld0->ops, &pln->super.super.ops);
     X(ops_madd2)(v, &cldm->ops, &pln->super.super.ops);

     if (ego->bufferedp) 
	  pln->super.super.ops.other += 4 * r * m * v;

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld0);
     X(plan_destroy_internal)(cldm);
     return 0;
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, ioffset, roffset, id, od;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     UNUSED(ego_);

     if (!applicable(p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl);
     bufdist = X(bufdist)(n + 2, vl); /* complex-side rdft2 stores N+2
					 real numbers */
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
	so as to allow optimizations in the the copy plan */
     roffset = (p->cr - p->ci > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);

     id = ivs * (nbuf * (vl / nbuf));
     od = ovs * (nbuf * (vl / nbuf));

     if (p->kind == R2HC) {
	  /* allow destruction of input if problem is in place */
	  cld = X(mkplan_f_d)(
	       plnr, 
	       X(mkproblem_rdft2_d)(
		    X(mktensor_1d)(n, p->sz->dims[0].is, 2),
		    X(mktensor_1d)(nbuf, ivs, bufdist),
		    TAINT(p->r0, ivs * nbuf), TAINT(p->r1, ivs * nbuf),
		    bufs + roffset, bufs + ioffset, p->kind),
	       0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
	  if (!cld) goto nada;

	  /* copying back from the buffer is a rank-0 DFT: */
	  cldcpy = X(mkplan_d)(
	       plnr, 
	       X(mkproblem_dft_d)(
		    X(mktensor_0d)(),
		    X(mktensor_2d)(nbuf, bufdist, ovs,
				   n/2+1, 2, p->sz->dims[0].os),
		    bufs + roffset, bufs + ioffset,
		    TAINT(p->cr, ovs * nbuf), TAINT(p->ci, ovs * nbuf) ));
	  if (!cldcpy) goto nada;

	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + id, p->r1 + id, 
				     p->cr + od, p->ci + od,
				     p->kind));
	  if (!cldrest) goto nada;
	  pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
     } else {
	  /* allow destruction of buffer */
	  cld = X(mkplan_f_d)(
	       plnr, 
	       X(mkproblem_rdft2_d)(
		    X(mktensor_1d)(n, 2, p->sz->dims[0].os),
		    X(mktensor_1d)(nbuf, bufdist, ovs),
		    TAINT(p->r0, ovs * nbuf), TAINT(p->r1, ovs * nbuf),
		    bufs + roffset, bufs + ioffset, p->kind),
	       0, 0, NO_DESTROY_INPUT);
	  if (!cld) goto nada;

	  /* copying input into buffer is a rank-0 DFT: */
	  cldcpy = X(mkplan_d)(
	       plnr, 
	       X(mkproblem_dft_d)(
		    X(mktensor_0d)(),
		    X(mktensor_2d)(nbuf, ivs, bufdist,
				   n/2+1, p->sz->dims[0].is, 2),
		    TAINT(p->cr, ivs * nbuf), TAINT(p->ci, ivs * nbuf), 
		    bufs + roffset, bufs + ioffset));
	  if (!cldcpy) goto nada;

	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + od, p->r1 + od, 
				     p->cr + id, p->ci + id,
				     p->kind));
	  if (!cldrest) goto nada;

	  pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
     }

     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
	  opcnt t;
	  X(ops_add)(&cld->ops, &cldcpy->ops, &t);
	  X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *cld = (plan *) 0, *cldcpy;
     R *buf = (R *) 0;
     int n;
     int vl, ivs, ovs;
     opcnt ops;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
	  goto nada;

     p = (const problem_rdft *) p_;

     n = p->sz->dims[0].n - 1;
     A(n > 0);
     buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);

     cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), 
						  X(mktensor_0d)(), 
						  buf, buf, R2HC));
     if (!cld)
	  goto nada;

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
     cldcpy =
	  X(mkplan_d)(plnr,
		      X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
					    X(mktensor_1d)(n+1,1,
							   p->sz->dims[0].os), 
					    buf, TAINT(p->O, ovs), R2HC));
     if (!cldcpy)
	  goto nada;

     X(ifree)(buf);

     pln = MKPLAN_RDFT(P, &padt, apply);

     pln->n = n;
     pln->is = p->sz->dims[0].is;
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     
     X(ops_zero)(&ops);
     ops.other = n + 2*n; /* loads + stores (input -> buf) */

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(ifree0)(buf);
     if (cld)
	  X(plan_destroy_internal)(cld);  
     return (plan *)0;
}
Exemplo n.º 7
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft *p = (const problem_rdft *) p_;
     float *bufs = (float *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs;
     int hc2rp;

     static const plan_adt padt = {
	  fftwf_rdft_solve, awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = fftwf_tensor_sz(p->sz);
     fftwf_tensor_tornk1(p->vecsz, &vl, &ivs, &ovs);
     hc2rp = (p->kind[0] == HC2R);

     nbuf = fftwf_nbuf(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = fftwf_bufdist(n, vl);
     A(nbuf > 0);

     /* initial allocation for the purpose of planning */
     bufs = (float *) MALLOC(sizeof(float) * nbuf * bufdist, BUFFERS);

     if (hc2rp) {
	  /* allow destruction of buffer */
	  cld = fftwf_mkplan_f_d(plnr,
			      fftwf_mkproblem_rdft_d(
				   fftwf_mktensor_1d(n, 1, p->sz->dims[0].os),
				   fftwf_mktensor_1d(nbuf, bufdist, ovs),
				   bufs, TAINT(p->O, ovs * nbuf), p->kind),
			      0, 0, NO_DESTROY_INPUT);
	  if (!cld) goto nada;

	  /* copying input into buffer buffer is a rank-0 transform: */
	  cldcpy = fftwf_mkplan_d(plnr,
			       fftwf_mkproblem_rdft_0_d(
				    fftwf_mktensor_2d(nbuf, ivs, bufdist,
						   n, p->sz->dims[0].is, 1),
				    TAINT(p->I, ivs * nbuf), bufs));
	  if (!cldcpy) goto nada;
     } else {
	  /* allow destruction of input if problem is in place */
	  cld = fftwf_mkplan_f_d(plnr,
			      fftwf_mkproblem_rdft_d(
				   fftwf_mktensor_1d(n, p->sz->dims[0].is, 1),
				   fftwf_mktensor_1d(nbuf, ivs, bufdist),
				   TAINT(p->I, ivs * nbuf), bufs, p->kind),
			      0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
	  if (!cld) goto nada;

	  /* copying back from the buffer is a rank-0 transform: */
	  cldcpy = fftwf_mkplan_d(plnr,
			       fftwf_mkproblem_rdft_0_d(
				    fftwf_mktensor_2d(nbuf, bufdist, ovs,
						   n, 1, p->sz->dims[0].os),
				    bufs, TAINT(p->O, ovs * nbuf)));
	  if (!cldcpy) goto nada;
     }

     /* deallocate buffers, let apply() allocate them for real */
     fftwf_ifree(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
	  INT id = ivs * (nbuf * (vl / nbuf));
	  INT od = ovs * (nbuf * (vl / nbuf));
	  cldrest = fftwf_mkplan_d(plnr,
				fftwf_mkproblem_rdft_d(
				     fftwf_tensor_copy(p->sz),
				     fftwf_mktensor_1d(vl % nbuf, ivs, ovs),
				     p->I + id, p->O + od, p->kind));
     }
     if (!cldrest) goto nada;

     pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
	  opcnt t;
	  fftwf_ops_add(&cld->ops, &cldcpy->ops, &t);
	  fftwf_ops_madd(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     fftwf_ifree0(bufs);
     fftwf_plan_destroy_internal(cldrest);
     fftwf_plan_destroy_internal(cldcpy);
     fftwf_plan_destroy_internal(cld);
     return (plan *) 0;
}
Exemplo n.º 8
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P *pln;
     plan *cld = 0, *cldtrans = 0, *cldrest = 0;
     int pdim0, pdim1;
     tensor *ts, *tv;
     INT vl, ivs, ovs;
     R *rit, *iit, *rot, *iot;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &pdim0, &pdim1))
          return (plan *) 0;

     vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n;
     A(vl >= 1);
     ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is;
     ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os;
     rit = TAINT(p->ri, vl == 1 ? 0 : ivs);
     iit = TAINT(p->ii, vl == 1 ? 0 : ivs);
     rot = TAINT(p->ro, vl == 1 ? 0 : ovs);
     iot = TAINT(p->io, vl == 1 ? 0 : ovs);

     ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS);
     ts->dims[pdim1].os = p->vecsz->dims[pdim0].is;
     tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS);
     tv->dims[pdim0].os = p->sz->dims[pdim1].is;
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
     cldtrans = X(mkplan_d)(plnr, 
			    X(mkproblem_dft_d)(X(mktensor_0d)(),
					       X(tensor_append)(tv, ts),
					       rit, iit, 
					       rot, iot));
     X(tensor_destroy2)(ts, tv);
     if (!cldtrans) goto nada;

     ts = X(tensor_copy)(p->sz);
     ts->dims[pdim1].is = p->vecsz->dims[pdim0].is;
     tv = X(tensor_copy)(p->vecsz);
     tv->dims[pdim0].is = p->sz->dims[pdim1].is;
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
     cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv,
						rot, iot,
						rot, iot));
     if (!cld) goto nada;

     tv = X(tensor_copy)(p->vecsz);
     tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n;
     cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv,
						    p->ri + ivs * vl,
						    p->ii + ivs * vl,
						    p->ro + ovs * vl,
						    p->io + ovs * vl));
     if (!cldrest) goto nada;

     pln = MKPLAN_DFT(P, &padt, apply_op);
     pln->cldtrans = cldtrans;
     pln->cld = cld;
     pln->cldrest = cldrest;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     X(ops_cpy)(&cldrest->ops, &pln->super.super.ops);
     X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops);
     X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops);
     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cld);
     X(plan_destroy_internal)(cldtrans);
     return (plan *)0;
}
Exemplo n.º 9
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *clde, *cldo;
     R *buf;
     INT n, n0;
     opcnt ops;
     int inplace_odd;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *)0;

     p = (const problem_rdft *) p_;

     n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1);
     A(n > 0 && n % 2 == 0);
     buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS);

     inplace_odd = p->kind[0]==RODFT00 && p->I == p->O;
     clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
			     X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is, 
					    inplace_odd ? p->sz->dims[0].is
					    : p->sz->dims[0].os), 
			     X(mktensor_0d)(), 
			     TAINT(p->I 
				   + p->sz->dims[0].is * (p->kind[0]==RODFT00),
				   p->vecsz->rnk ? p->vecsz->dims[0].is : 0),
			     TAINT(p->O
				   + p->sz->dims[0].is * inplace_odd,
				   p->vecsz->rnk ? p->vecsz->dims[0].os : 0),
			     p->kind[0]));
     if (!clde) {
	  X(ifree)(buf);
          return (plan *)0;
     }

     cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
			     X(mktensor_1d)(n/2, 1, 1), 
			     X(mktensor_0d)(), 
			     buf, buf, R2HC));
     X(ifree)(buf);
     if (!cldo)
          return (plan *)0;

     pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o);

     pln->n = n;
     pln->is = p->sz->dims[0].is;
     pln->os = p->sz->dims[0].os;
     pln->clde = clde;
     pln->cldo = cldo;
     pln->td = 0;

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     
     X(ops_zero)(&ops);
     ops.other = n/2;
     ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) +
	  (n/2-1)/2 * 6 + ((n/2)%2==0) * 2;
     ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2;

     /* tweak ops.other so that r2hc-pad is used for small sizes, which
	seems to be a lot faster on my machine: */
     ops.other += 256;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops);

     return &(pln->super.super);
}
Exemplo n.º 10
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     plan *cld = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, rs, id, od;

     static const plan_adt padt = {
	  X(rdft2_solve), awake, print, destroy
     };

     if (!applicable(p_, ego, plnr))
          goto nada;

     n = p->sz->dims[0].n;
     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(imax)(X(nbuf)(n, vl, 0), min_nbuf(p, n, vl));
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);

     id = ivs * (nbuf * (vl / nbuf));
     od = ovs * (nbuf * (vl / nbuf));

     if (p->kind == R2HC) {
	  cld = X(mkplan_f_d)(
	       plnr,
	       X(mkproblem_rdft_d)(
		    X(mktensor_1d)(n, p->sz->dims[0].is/2, 1),
		    X(mktensor_1d)(nbuf, ivs, bufdist),
		    TAINT(p->r0, ivs * nbuf), bufs, &p->kind),
	       0, 0, (p->r0 == p->cr) ? NO_DESTROY_INPUT : 0);
	  if (!cld) goto nada;
	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + id, p->r1 + id, 
				     p->cr + od, p->ci + od,
				     p->kind));
	  if (!cldrest) goto nada;

	  pln = MKPLAN_RDFT2(P, &padt, apply_r2hc);
     } else {
	  A(p->kind == HC2R);
	  cld = X(mkplan_f_d)(
	       plnr,
	       X(mkproblem_rdft_d)(
		    X(mktensor_1d)(n, 1, p->sz->dims[0].os/2),
		    X(mktensor_1d)(nbuf, bufdist, ovs),
		    bufs, TAINT(p->r0, ovs * nbuf), &p->kind),
	       0, 0, NO_DESTROY_INPUT); /* always ok to destroy bufs */
	  if (!cld) goto nada;
	  X(ifree)(bufs); bufs = 0;

	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_rdft2_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->r0 + od, p->r1 + od, 
				     p->cr + id, p->ci + id,
				     p->kind));
	  if (!cldrest) goto nada;
	  pln = MKPLAN_RDFT2(P, &padt, apply_hc2r);
     }

     pln->cld = cld;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     X(rdft2_strides)(p->kind, &p->sz->dims[0], &rs, &pln->cs);
     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops,
		 &pln->super.super.ops);
     pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl;

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}