Ejemplo n.º 1
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const problem_dft *p;
     iodim *d;
     const kdft_desc *e = ego->desc;

     static const plan_adt padt = {
	  X(dft_solve), X(null_awake), print, destroy
     };

     UNUSED(plnr);

     if (!applicable(ego_, p_, plnr))
          return (plan *)0;

     p = (const problem_dft *) p_;

     pln = MKPLAN_DFT(P, &padt, apply);

     d = p->sz->dims;

     pln->k = ego->k;
     pln->is = X(mkstride)(e->sz, d[0].is);
     pln->os = X(mkstride)(e->sz, d[0].os);

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     pln->slv = ego;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops);

     return &(pln->super.super);
}
Ejemplo n.º 2
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_dft *p;
     P *pln;

     static const plan_adt padt = {
	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
     };

     UNUSED(plnr);

     if (!applicable(ego_, p_))
          return (plan *) 0;

     p = (const problem_dft *) p_;
     pln = MKPLAN_DFT(P, &padt, ego->adt->apply);

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     pln->slv = ego;

     /* 2*vl loads, 2*vl stores */
     X(ops_other)(4 * pln->vl, &pln->super.super.ops);
     return &(pln->super.super);
}
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p;
     P *pln;
     const iodim *d;

     static const plan_adt padt = {
	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
     };

     UNUSED(ego);

     if (!applicable(p_, plnr))
          return (plan *) 0;
     p = (const problem_dft *) p_;

     d = p->vecsz->dims;
     pln = MKPLAN_DFT(P, &padt, 
		      X(transpose_simplep)(d, d+1, 1, X(imin)(d[0].is,d[0].os),
					   p->ri, p->ii) ? apply :
		      (X(transpose_slowp)(d, d+1, 2) ? apply_slow : 
		       apply_general));
     X(transpose_dims)(d, d+1, &pln->n, &pln->m, &pln->d, &pln->nd, &pln->md);
     pln->offset = (p->ri - p->ii == 1) ? -1 : 0;
     pln->s0 = d[0].is;
     pln->s1 = d[0].os;

     /* (4 loads + 4 stores) * (pln->n \choose 2)
        (FIXME? underestimate for non-square) */
     X(ops_other)(4 * pln->n * (pln->m - 1), &pln->super.super.ops);
     return &(pln->super.super);
}
Ejemplo n.º 4
0
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p;
     P *pln;
     INT n;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, X(plan_null_destroy)
     };

     if (!applicable(ego, p_, plnr))
          return (plan *)0;

     pln = MKPLAN_DFT(P, &padt, apply);

     p = (const problem_dft *) p_;
     pln->n = n = p->sz->dims[0].n;
     pln->is = p->sz->dims[0].is;
     pln->os = p->sz->dims[0].os;
     pln->td = 0;

     pln->super.super.ops.add = (n-1) * 5;
     pln->super.super.ops.mul = 0;
     pln->super.super.ops.fma = (n-1) * (n-1) ;
#if 0 /* these are nice pipelined sequential loads and should cost nothing */
     pln->super.super.ops.other = (n-1)*(4 + 1 + 2 * (n-1));  /* approximate */
#endif

     return &(pln->super.super);
}
Ejemplo n.º 5
0
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P *pln;
     INT n;
     INT is, os;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
	  return (plan *) 0;

     n = p->sz->dims[0].n;
     is = p->sz->dims[0].is;
     os = p->sz->dims[0].os;

     pln = MKPLAN_DFT(P, &padt, apply);
     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
	  X(ifree)(pln);
	  return (plan *) 0;
     }
     return &(pln->super.super);
}
Ejemplo n.º 6
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_dft *p;
     plan *cld;
     INT ishift = 0, oshift = 0;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     UNUSED(ego_);
     if (!applicable(p_, plnr))
          return (plan *)0;

     p = (const problem_dft *) p_;

     {
	  tensor *ri_vec = X(mktensor_1d)(2, p->ii - p->ri, p->io - p->ro);
	  tensor *cld_vec = X(tensor_append)(ri_vec, p->vecsz);
	  int i;
	  for (i = 0; i < cld_vec->rnk; ++i) { /* make all istrides > 0 */
	       if (cld_vec->dims[i].is < 0) {
		    INT nm1 = cld_vec->dims[i].n - 1;
		    ishift -= nm1 * (cld_vec->dims[i].is *= -1);
		    oshift -= nm1 * (cld_vec->dims[i].os *= -1);
	       }
	  }
	  cld = X(mkplan_d)(plnr, 
			    X(mkproblem_rdft_1)(p->sz, cld_vec, 
						p->ri + ishift, 
						p->ro + oshift, R2HC));
	  X(tensor_destroy2)(ri_vec, cld_vec);
     }
     if (!cld) return (plan *)0;

     pln = MKPLAN_DFT(P, &padt, apply);

     if (p->sz->rnk == 0) {
	  pln->n = 1;
	  pln->os = 0;
     }
     else {
	  pln->n = p->sz->dims[0].n;
	  pln->os = p->sz->dims[0].os;
     }
     pln->ishift = ishift;
     pln->oshift = oshift;

     pln->cld = cld;
     
     pln->super.super.ops = cld->ops;
     pln->super.super.ops.other += 8 * ((pln->n - 1)/2);
     pln->super.super.ops.add += 4 * ((pln->n - 1)/2);
     pln->super.super.ops.other += 1; /* estimator hack for nop plans */

     return &(pln->super.super);
}
Ejemplo n.º 7
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const problem_dft *p;
     iodim *d;
     const kdft_desc *e = ego->desc;

     static const plan_adt padt = {
	  X(dft_solve), X(null_awake), print, destroy
     };

     UNUSED(plnr);

     if (ego->bufferedp) {
	  if (!applicable_buf(ego_, p_, plnr))
	       return (plan *)0;
	  pln = MKPLAN_DFT(P, &padt, apply_buf);
     } else {
	  int extra_iterp = 0;
	  if (!applicable(ego_, p_, plnr, &extra_iterp))
	       return (plan *)0;
	  pln = MKPLAN_DFT(P, &padt, extra_iterp ? apply_extra_iter : apply);
     }

     p = (const problem_dft *) p_;
     d = p->sz->dims;
     pln->k = ego->k;
     pln->n = d[0].n;
     pln->is = X(mkstride)(pln->n, d[0].is);
     pln->os = X(mkstride)(pln->n, d[0].os);
     pln->bufstride = X(mkstride)(pln->n, 2 * compute_batchsize(pln->n));

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     pln->slv = ego;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops);

     if (ego->bufferedp) 
	  pln->super.super.ops.other += 4 * pln->n * pln->vl;

     pln->super.super.could_prune_now_p = !ego->bufferedp;
     return &(pln->super.super);
}
Ejemplo n.º 8
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_dft *p;
     P *pln;
     plan *cld1 = 0, *cld2 = 0;
     tensor *sz1, *sz2, *vecszi, *sz2i;
     int spltrnk;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &spltrnk))
          return (plan *) 0;

     p = (const problem_dft *) p_;
     X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);
     vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS);
     sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS);

     cld1 = X(mkplan_d)(plnr, 
			X(mkproblem_dft_d)(X(tensor_copy)(sz2),
					   X(tensor_append)(p->vecsz, sz1),
					   p->ri, p->ii, p->ro, p->io));
     if (!cld1) goto nada;

     cld2 = X(mkplan_d)(plnr, 
			X(mkproblem_dft_d)(
			     X(tensor_copy_inplace)(sz1, INPLACE_OS),
			     X(tensor_append)(vecszi, sz2i),
			     p->ro, p->io, p->ro, p->io));
     if (!cld2) goto nada;

     pln = MKPLAN_DFT(P, &padt, apply);

     pln->cld1 = cld1;
     pln->cld2 = cld2;

     pln->solver = ego;
     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);

     X(tensor_destroy4)(sz1, sz2, vecszi, sz2i);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     X(tensor_destroy4)(sz1, sz2, vecszi, sz2i);
     return (plan *) 0;
}
Ejemplo n.º 9
0
static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P *pln;
     INT n, nb;
     plan *cldf = 0;
     R *buf = (R *) 0;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
	  return (plan *) 0;

     n = p->sz->dims[0].n;
     nb = choose_transform_size(2 * n - 1);
     buf = (R *) MALLOC(2 * nb * sizeof(R), BUFFERS);

     cldf = X(mkplan_f_d)(plnr, 
			  X(mkproblem_dft_d)(X(mktensor_1d)(nb, 2, 2),
					     X(mktensor_1d)(1, 0, 0),
					     buf, buf+1, 
					     buf, buf+1),
			  NO_SLOW, 0, 0);
     if (!cldf) goto nada;

     X(ifree)(buf);

     pln = MKPLAN_DFT(P, &padt, apply);

     pln->n = n;
     pln->nb = nb;
     pln->w = 0;
     pln->W = 0;
     pln->cldf = cldf;
     pln->is = p->sz->dims[0].is;
     pln->os = p->sz->dims[0].os;

     X(ops_add)(&cldf->ops, &cldf->ops, &pln->super.super.ops);
     pln->super.super.ops.add += 4 * n + 2 * nb;
     pln->super.super.ops.mul += 8 * n + 4 * nb;
     pln->super.super.ops.other += 6 * (n + nb);

     return &(pln->super.super);

 nada:
     X(ifree0)(buf);
     X(plan_destroy_internal)(cldf);
     return (plan *)0;
}
Ejemplo n.º 10
0
static plan *mkplan_dit(const solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P_dit *pln = 0;
     int n, r, m;
     int is, os;
     plan *cld = (plan *) 0;

     static const plan_adt padt = {
	  X(dft_solve), awake_dit, print_dit, destroy_dit
     };

     if (!applicable_dit(ego, p_, plnr))
          goto nada;

     n = p->sz->dims[0].n;
     is = p->sz->dims[0].is;
     os = p->sz->dims[0].os;

     r = X(first_divisor)(n);
     m = n / r;

     cld = X(mkplan_d)(plnr, 
		       X(mkproblem_dft_d)(X(mktensor_1d)(m, r * is, os),
					  X(mktensor_1d)(r, is, m * os),
					  p->ri, p->ii, p->ro, p->io));
     if (!cld) goto nada;

     pln = MKPLAN_DFT(P_dit, &padt, apply_dit);
     if (!mkP(&pln->super, r, os*m, os*m, p->ro, p->io, plnr))
	  goto nada;

     pln->os = os;
     pln->m = m;
     pln->cld = cld;
     pln->W = 0;

     pln->super.super.super.ops.add += 2 * (r-1);
     pln->super.super.super.ops.mul += 4 * (r-1);
     X(ops_madd)(m, &pln->super.super.super.ops, &cld->ops,
		 &pln->super.super.super.ops);

     return &(pln->super.super.super);

 nada:
     X(plan_destroy_internal)(cld);
     X(ifree0)(pln);
     return (plan *) 0;
}
Ejemplo n.º 11
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_dft *p;
     plan *cld;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     UNUSED(ego_);
     if (!applicable(p_, plnr))
          return (plan *)0;

     p = (const problem_dft *) p_;

     {
	  tensor *ri_vec = X(mktensor_1d)(2, p->ii - p->ri, p->io - p->ro);
	  tensor *cld_vec = X(tensor_append)(ri_vec, p->vecsz);
	  cld = X(mkplan_d)(plnr, 
			    X(mkproblem_rdft_1)(p->sz, cld_vec, 
						p->ri, p->ro, R2HC));
	  X(tensor_destroy2)(ri_vec, cld_vec);
     }
     if (!cld) return (plan *)0;

     pln = MKPLAN_DFT(P, &padt, apply);

#if ALLOW_RANK0
     if (p->sz->rnk == 0) {
	  pln->n = 1;
	  pln->os = 0;
     }
     else
#endif
     {
	  pln->n = p->sz->dims[0].n;
	  pln->os = p->sz->dims[0].os;
     }

     pln->cld = cld;
     
     pln->super.super.ops = cld->ops;
     pln->super.super.ops.other += 8 * ((pln->n - 1)/2);
     pln->super.super.ops.add += 4 * ((pln->n - 1)/2);

     return &(pln->super.super);
}
Ejemplo n.º 12
0
static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
{
     static const plan_adt padt = {
	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
     };
     plan_dft *pln;

     UNUSED(plnr);

     if (!applicable(ego, p))
          return (plan *) 0;
     pln = MKPLAN_DFT(plan_dft, &padt, apply);
     X(ops_zero)(&pln->super.ops);

     return &(pln->super);
}
Ejemplo n.º 13
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_dft *p;
     P *pln;
     plan *cld;
     int vdim;
     iodim *d;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &vdim))
          return (plan *) 0;
     p = (const problem_dft *) p_;

     d = p->vecsz->dims + vdim;

     A(d->n > 1);
     cld = X(mkplan_d)(plnr,
		       X(mkproblem_dft_d)(
			    X(tensor_copy)(p->sz),
			    X(tensor_copy_except)(p->vecsz, vdim),
			    TAINT(p->ri, d->is), TAINT(p->ii, d->is),
			    TAINT(p->ro, d->os), TAINT(p->io, d->os)));
     if (!cld) return (plan *) 0;

     pln = MKPLAN_DFT(P, &padt, apply);

     pln->cld = cld;
     pln->vl = d->n;
     pln->ivs = d->is;
     pln->ovs = d->os;

     pln->solver = ego;
     X(ops_zero)(&pln->super.super.ops);
     pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);

     if (p->sz->rnk != 1 || (p->sz->dims[0].n > 64))
	  pln->super.super.pcost = pln->vl * cld->pcost;

     return &(pln->super.super);
}
Ejemplo n.º 14
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     const S *ego = (const S *) ego_;
     P *pln;
     plan *cld = 0, *cldcpy = 0;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *) 0;

     cldcpy =
	  X(mkplan_d)(plnr, 
		      X(mkproblem_dft_d)(X(mktensor_0d)(),
					 X(tensor_append)(p->vecsz, p->sz),
					 p->ri, p->ii, p->ro, p->io));

     if (!cldcpy) goto nada;

     cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0);
     if (!cld) goto nada;

     pln = MKPLAN_DFT(P, &padt, ego->adt->apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->slv = ego;
     X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld);
     X(plan_destroy_internal)(cldcpy);
     return (plan *)0;
}
Ejemplo n.º 15
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_dft *p = (const problem_dft *) p_;
     R *bufs = (R *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs, roffset, ioffset;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = X(tensor_sz)(p->sz);

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     nbuf = X(nbuf)(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = X(bufdist)(n, vl);
     A(nbuf > 0);

     /* attempt to keep real and imaginary part in the same order,
	so as to allow optimizations in the the copy plan */
     roffset = (p->ri - p->ii > 0) ? (INT)1 : (INT)0;
     ioffset = 1 - roffset;

     /* initial allocation for the purpose of planning */
     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);

     /* allow destruction of input if problem is in place */
     cld = X(mkplan_f_d)(plnr,
			 X(mkproblem_dft_d)(
			      X(mktensor_1d)(n, p->sz->dims[0].is, 2),
			      X(mktensor_1d)(nbuf, ivs, bufdist * 2),
			      TAINT(p->ri, ivs * nbuf),
			      TAINT(p->ii, ivs * nbuf),
			      bufs + roffset, 
			      bufs + ioffset),
			 0, 0, (p->ri == p->ro) ? NO_DESTROY_INPUT : 0);
     if (!cld)
          goto nada;

     /* copying back from the buffer is a rank-0 transform: */
     cldcpy = X(mkplan_d)(plnr,
			  X(mkproblem_dft_d)(
			       X(mktensor_0d)(),
			       X(mktensor_2d)(nbuf, bufdist * 2, ovs,
					      n, 2, p->sz->dims[0].os),
			       bufs + roffset, 
			       bufs + ioffset, 
			       TAINT(p->ro, ovs * nbuf), 
			       TAINT(p->io, ovs * nbuf)));
     if (!cldcpy)
          goto nada;

     /* deallocate buffers, let apply() allocate them for real */
     X(ifree)(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
	  INT id = ivs * (nbuf * (vl / nbuf));
	  INT od = ovs * (nbuf * (vl / nbuf));
	  cldrest = X(mkplan_d)(plnr, 
				X(mkproblem_dft_d)(
				     X(tensor_copy)(p->sz),
				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
				     p->ri+id, p->ii+id, p->ro+od, p->io+od));
     }
     if (!cldrest)
          goto nada;

     pln = MKPLAN_DFT(P, &padt, apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;
     pln->roffset = roffset;
     pln->ioffset = ioffset;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
	  opcnt t;
	  X(ops_add)(&cld->ops, &cldcpy->ops, &t);
	  X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     X(ifree0)(bufs);
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cldcpy);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
Ejemplo n.º 16
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_dft *p;
     P *pln;
     problem *cldp;
     int vdim;
     iodim *d;
     plan **cldrn = (plan **) 0;
     int i, nthr;
     INT its, ots, block_size;
     tensor *vecsz = 0;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &vdim))
          return (plan *) 0;
     p = (const problem_dft *) p_;
     d = p->vecsz->dims + vdim;

     block_size = (d->n + plnr->nthr - 1) / plnr->nthr;
     nthr = (int)((d->n + block_size - 1) / block_size);
     plnr->nthr = (plnr->nthr + nthr - 1) / nthr;
     its = d->is * block_size;
     ots = d->os * block_size;

     cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS);
     for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0;
     
     vecsz = X(tensor_copy)(p->vecsz);
     for (i = 0; i < nthr; ++i) {
	  vecsz->dims[vdim].n =
	       (i == nthr - 1) ? (d->n - i*block_size) : block_size;
	  cldp = X(mkproblem_dft)(p->sz, vecsz,
				  p->ri + i*its, p->ii + i*its, 
				  p->ro + i*ots, p->io + i*ots);
	  cldrn[i] = X(mkplan_d)(plnr, cldp);
	  if (!cldrn[i]) goto nada;
     }
     X(tensor_destroy)(vecsz);

     pln = MKPLAN_DFT(P, &padt, apply);

     pln->cldrn = cldrn;
     pln->its = its;
     pln->ots = ots;
     pln->nthr = nthr;

     pln->solver = ego;
     X(ops_zero)(&pln->super.super.ops);
     pln->super.super.pcost = 0;
     for (i = 0; i < nthr; ++i) {
	  X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops);
	  pln->super.super.pcost += cldrn[i]->pcost;
     }

     return &(pln->super.super);

 nada:
     if (cldrn) {
	  for (i = 0; i < nthr; ++i)
	       X(plan_destroy_internal)(cldrn[i]);
	  X(ifree)(cldrn);
     }
     X(tensor_destroy)(vecsz);
     return (plan *) 0;
}
Ejemplo n.º 17
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     P *pln;
     plan *cld = 0, *cldtrans = 0, *cldrest = 0;
     int pdim0, pdim1;
     tensor *ts, *tv;
     INT vl, ivs, ovs;
     R *rit, *iit, *rot, *iot;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &pdim0, &pdim1))
          return (plan *) 0;

     vl = p->vecsz->dims[pdim0].n / p->sz->dims[pdim1].n;
     A(vl >= 1);
     ivs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].is;
     ovs = p->sz->dims[pdim1].n * p->vecsz->dims[pdim0].os;
     rit = TAINT(p->ri, vl == 1 ? 0 : ivs);
     iit = TAINT(p->ii, vl == 1 ? 0 : ivs);
     rot = TAINT(p->ro, vl == 1 ? 0 : ovs);
     iot = TAINT(p->io, vl == 1 ? 0 : ovs);

     ts = X(tensor_copy_inplace)(p->sz, INPLACE_IS);
     ts->dims[pdim1].os = p->vecsz->dims[pdim0].is;
     tv = X(tensor_copy_inplace)(p->vecsz, INPLACE_IS);
     tv->dims[pdim0].os = p->sz->dims[pdim1].is;
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
     cldtrans = X(mkplan_d)(plnr, 
			    X(mkproblem_dft_d)(X(mktensor_0d)(),
					       X(tensor_append)(tv, ts),
					       rit, iit, 
					       rot, iot));
     X(tensor_destroy2)(ts, tv);
     if (!cldtrans) goto nada;

     ts = X(tensor_copy)(p->sz);
     ts->dims[pdim1].is = p->vecsz->dims[pdim0].is;
     tv = X(tensor_copy)(p->vecsz);
     tv->dims[pdim0].is = p->sz->dims[pdim1].is;
     tv->dims[pdim0].n = p->sz->dims[pdim1].n;
     cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(ts, tv,
						rot, iot,
						rot, iot));
     if (!cld) goto nada;

     tv = X(tensor_copy)(p->vecsz);
     tv->dims[pdim0].n -= vl * p->sz->dims[pdim1].n;
     cldrest = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(tensor_copy)(p->sz), tv,
						    p->ri + ivs * vl,
						    p->ii + ivs * vl,
						    p->ro + ovs * vl,
						    p->io + ovs * vl));
     if (!cldrest) goto nada;

     pln = MKPLAN_DFT(P, &padt, apply_op);
     pln->cldtrans = cldtrans;
     pln->cld = cld;
     pln->cldrest = cldrest;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     X(ops_cpy)(&cldrest->ops, &pln->super.super.ops);
     X(ops_madd2)(vl, &cld->ops, &pln->super.super.ops);
     X(ops_madd2)(vl, &cldtrans->ops, &pln->super.super.ops);
     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldrest);
     X(plan_destroy_internal)(cld);
     X(plan_destroy_internal)(cldtrans);
     return (plan *)0;
}
Ejemplo n.º 18
0
Archivo: ct.c Proyecto: Aegisub/fftw3
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const ct_solver *ego = (const ct_solver *) ego_;
     const problem_dft *p;
     P *pln = 0;
     plan *cld = 0, *cldw = 0;
     INT n, r, m, v, ivs, ovs;
     iodim *d;

     static const plan_adt padt = {
	  X(dft_solve), awake, print, destroy
     };

     if ((NO_NONTHREADEDP(plnr)) || !X(ct_applicable)(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_dft *) p_;
     d = p->sz->dims;
     n = d[0].n;
     r = X(choose_radix)(ego->r, n);
     m = n / r;

     X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs);

     switch (ego->dec) {
	 case DECDIT:
	 {
	      cldw = ego->mkcldw(ego,
				 r, m * d[0].os, m * d[0].os,
				 m, d[0].os,
				 v, ovs, ovs,
				 0, m,
				 p->ro, p->io, plnr);
	      if (!cldw) goto nada;

	      cld = X(mkplan_d)(plnr,
				X(mkproblem_dft_d)(
				     X(mktensor_1d)(m, r * d[0].is, d[0].os),
				     X(mktensor_2d)(r, d[0].is, m * d[0].os,
						    v, ivs, ovs),
				     p->ri, p->ii, p->ro, p->io)
		   );
	      if (!cld) goto nada;

	      pln = MKPLAN_DFT(P, &padt, apply_dit);
	      break;
	 }
	 case DECDIF:
	 case DECDIF+TRANSPOSE:
	 {
	      INT cors, covs; /* cldw ors, ovs */
	      if (ego->dec == DECDIF+TRANSPOSE) {
		   cors = ivs;
		   covs = m * d[0].is;
		   /* ensure that we generate well-formed dftw subproblems */
		   /* FIXME: too conservative */
		   if (!(1
			 && r == v
			 && d[0].is == r * cors))
			goto nada;

		   /* FIXME: allow in-place only for now, like in
		      fftw-3.[01] */
		   if (!(1
			 && p->ri == p->ro
			 && d[0].is == r * d[0].os
			 && cors == d[0].os
			 && covs == ovs
			    ))
			goto nada;
	      } else {
		   cors = m * d[0].is;
		   covs = ivs;
	      }

	      cldw = ego->mkcldw(ego,
				 r, m * d[0].is, cors,
				 m, d[0].is,
				 v, ivs, covs,
				 0, m,
				 p->ri, p->ii, plnr);
	      if (!cldw) goto nada;

	      cld = X(mkplan_d)(plnr,
				X(mkproblem_dft_d)(
				     X(mktensor_1d)(m, d[0].is, r * d[0].os),
				     X(mktensor_2d)(r, cors, d[0].os,
						    v, covs, ovs),
				     p->ri, p->ii, p->ro, p->io)
		   );
	      if (!cld) goto nada;

	      pln = MKPLAN_DFT(P, &padt, apply_dif);
	      break;
	 }

	 default: A(0);

     }

     pln->cld = cld;
     pln->cldw = cldw;
     pln->r = r;
     X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops);

     /* inherit could_prune_now_p attribute from cldw */
     pln->super.super.could_prune_now_p = cldw->could_prune_now_p;
     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldw);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}