예제 #1
0
static plan *mkcldw(const ct_solver *ego_,
		    INT r, INT irs, INT ors,
		    INT m, INT ms,
		    INT v, INT ivs, INT ovs,
		    INT mstart, INT mcount,
		    R *rio, R *iio,
		    planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const ct_desc *e = ego->desc;
     INT extra_iter;

     static const plan_adt padt = {
	  0, awake, print, destroy
     };

     A(mstart >= 0 && mstart + mcount <= m);
     if (!applicable(ego,
		     r, irs, ors, m, ms, v, ivs, ovs, mstart, mstart + mcount,
		     rio, iio, plnr, &extra_iter))
          return (plan *)0;

     if (ego->bufferedp) {
	  pln = MKPLAN_DFTW(P, &padt, apply_buf);
     } else {
	  pln = MKPLAN_DFTW(P, &padt, extra_iter ? apply_extra_iter : apply);
     }

     pln->k = ego->k;
     pln->rs = X(mkstride)(r, irs);
     pln->td = 0;
     pln->r = r;
     pln->m = m;
     pln->ms = ms;
     pln->v = v;
     pln->vs = ivs;
     pln->mb = mstart;
     pln->me = mstart + mcount;
     pln->slv = ego;
     pln->brs = X(mkstride)(r, 2 * compute_batchsize(r));
     pln->extra_iter = extra_iter;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(v * (mcount/e->genus->vl), &e->ops, &pln->super.super.ops);

     if (ego->bufferedp) {
	  /* 8 load/stores * N * V */
	  pln->super.super.ops.other += 8 * r * mcount * v;
     }

     pln->super.super.could_prune_now_p =
	  (!ego->bufferedp && r >= 5 && r < 64 && m >= r);
     return &(pln->super.super);
}
예제 #2
0
static plan *mkcldw(const ct_solver *ego_, 
		    int dec, INT r, INT m, INT s, INT vl, INT vs, 
		    INT mstart, INT mcount,
		    R *rio, R *iio,
		    planner *plnr)
{
     const S *ego = (const S *)ego_;
     P *pln;
     plan *cld = 0;

     static const plan_adt padt = {
	  0, awake, print, destroy
     };

     A(mstart >= 0 && mstart + mcount <= m);
     if (!applicable(r, m, plnr))
          return (plan *)0;

     cld = X(mkplan_d)(plnr, 
			X(mkproblem_dft_d)(
			     X(mktensor_1d)(r, m * s, m * s),
			     X(mktensor_2d)(mcount, s, s, vl, vs, vs),
			     rio, iio, rio, iio)
			);
     if (!cld) goto nada;

     pln = MKPLAN_DFTW(P, &padt, dec == DECDIT ? apply_dit : apply_dif);
     pln->slv = ego;
     pln->cld = cld;
     pln->r = r;
     pln->m = m;
     pln->s = s;
     pln->vl = vl;
     pln->vs = vs;
     pln->mstart = mstart;
     pln->mcount = mcount;
     pln->dec = dec;
     pln->td = 0;

     {
	  double n0 = (r - 1) * (mcount - 1) * vl;
	  pln->super.super.ops = cld->ops;
	  pln->super.super.ops.mul += 8 * n0;
	  pln->super.super.ops.add += 4 * n0;
	  pln->super.super.ops.other += 8 * n0;
     }
     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
예제 #3
0
static plan *mkcldw(const ct_solver *ego_,
		    INT r, INT irs, INT ors,
		    INT m, INT ms,
		    INT v, INT ivs, INT ovs,
		    INT mstart, INT mcount,
		    R *rio, R *iio,
		    planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const ct_desc *e = ego->desc;

     static const plan_adt padt = {
	  0, awake, print, destroy
     };

     A(mstart >= 0 && mstart + mcount <= m);
     if (!applicable(ego,
		     r, irs, ors, m, ms, v, ivs, ovs, mstart, mstart + mcount,
		     rio, iio, plnr))
          return (plan *)0;

     pln = MKPLAN_DFTW(P, &padt, apply);

     pln->k = ego->k;
     pln->rs = X(mkstride)(r, irs);
     pln->vs = X(mkstride)(v, ivs);
     pln->td = 0;
     pln->r = r;
     pln->m = m;
     pln->ms = ms;
     pln->v = v;
     pln->mb = mstart;
     pln->me = mstart + mcount;
     pln->slv = ego;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(mcount/e->genus->vl, &e->ops, &pln->super.super.ops);

     return &(pln->super.super);
}