Exemple #1
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_rdft *p;
     const S *ego = (const S *) ego_;
     P *pln;
     int retval;

     static const plan_adt padt = {
	  X(rdft_solve), X(null_awake), print, X(plan_null_destroy)
     };

     UNUSED(plnr);

     if (!applicable(ego, p_))
          return (plan *) 0;

     p = (const problem_rdft *) p_;
     pln = MKPLAN_RDFT(P, &padt, ego->apply);

     retval = fill_iodim(pln, p);
     (void)retval; /* UNUSED unless DEBUG */
     A(retval);
     A(pln->vl > 0); /* because FINITE_RNK(p->vecsz->rnk) holds */
     pln->nam = ego->nam;

     /* X(tensor_sz)(p->vecsz) loads, X(tensor_sz)(p->vecsz) stores */
     X(ops_other)(2 * X(tensor_sz)(p->vecsz), &pln->super.super.ops);
     return &(pln->super.super);
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *cld;

     static const plan_adt padt = {
	  fftwf_rdft_solve, awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *)0;

     p = (const problem_rdft *) p_;

     /* NO_DHT_R2HC stops infinite loops with rdft-dht.c */
     cld = fftwf_mkplan_f_d(plnr,
			 fftwf_mkproblem_rdft_1(p->sz, p->vecsz,
					     p->I, p->O, R2HC),
			 NO_DHT_R2HC, 0, 0);
     if (!cld) return (plan *)0;

     pln = MKPLAN_RDFT(P, &padt, apply);

     pln->n = p->sz->dims[0].n;
     pln->os = p->sz->dims[0].os;
     pln->cld = cld;

     pln->super.super.ops = cld->ops;
     pln->super.super.ops.other += 4 * ((pln->n - 1)/2);
     pln->super.super.ops.add += 2 * ((pln->n - 1)/2);

     return &(pln->super.super);
}
Exemple #3
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft *p;
     P *pln;
     plan *cld1 = 0, *cld2 = 0;
     tensor *sz1, *sz2, *vecszi, *sz2i;
     int spltrnk;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &spltrnk))
          return (plan *) 0;

     p = (const problem_rdft *) p_;
     X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);
     vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS);
     sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS);

     cld1 = X(mkplan_d)(plnr, 
			X(mkproblem_rdft_d)(X(tensor_copy)(sz2),
					    X(tensor_append)(p->vecsz, sz1),
					    p->I, p->O, p->kind + spltrnk));
     if (!cld1) goto nada;

     cld2 = X(mkplan_d)(plnr, 
			X(mkproblem_rdft_d)(
			     X(tensor_copy_inplace)(sz1, INPLACE_OS),
			     X(tensor_append)(vecszi, sz2i),
			     p->O, p->O, p->kind));
     if (!cld2) goto nada;

     pln = MKPLAN_RDFT(P, &padt, apply);

     pln->cld1 = cld1;
     pln->cld2 = cld2;

     pln->solver = ego;
     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);

     X(tensor_destroy4)(sz2, sz1, vecszi, sz2i);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     X(tensor_destroy4)(sz2, sz1, vecszi, sz2i);
     return (plan *) 0;
}
Exemple #4
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *cld;
     R *buf;
     INT n;
     opcnt ops;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *)0;

     p = (const problem_rdft *) p_;

     n = p->sz->dims[0].n - 1;
     A(n > 0);
     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);

     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), 
						   X(mktensor_0d)(), 
						   buf, buf, R2HC));
     X(ifree)(buf);
     if (!cld)
          return (plan *)0;

     pln = MKPLAN_RDFT(P, &padt, apply);

     pln->n = n;
     pln->is = p->sz->dims[0].is;
     pln->os = p->sz->dims[0].os;
     pln->cld = cld;
     pln->td = 0;

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     
     X(ops_zero)(&ops);
     ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5;
     ops.add = 2 + (n-1)/2 * 5;
     ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);

     return &(pln->super.super);
}
Exemple #5
0
static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
{
     static const plan_adt padt = {
	  fftwf_rdft_solve, fftwf_null_awake, print, fftwf_plan_null_destroy
     };
     plan_rdft *pln;

     UNUSED(plnr);

     if (!applicable(ego, p))
          return (plan *) 0;
     pln = MKPLAN_RDFT(plan_rdft, &padt, apply);
     fftwf_ops_zero(&pln->super.ops);

     return &(pln->super);
}
Exemple #6
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
    const S *ego = (const S *) ego_;
    P *pln;
    const problem_rdft *p;
    iodim *d;

    static const plan_adt padt = {
        fftwf_rdft_solve, fftwf_null_awake, print, destroy
    };

    UNUSED(plnr);

    if (!applicable(ego_, p_))
        return (plan *)0;

    p = (const problem_rdft *) p_;


    pln = MKPLAN_RDFT(P, &padt, apply);

    d = p->sz->dims;

    pln->k = ego->k;

    pln->is = fftwf_mkstride(d->n, d->is);
    pln->os = fftwf_mkstride(d->n, d->os);

    fftwf_tensor_tornk1(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);

    pln->slv = ego;
    fftwf_ops_zero(&pln->super.super.ops);
    fftwf_ops_madd2(pln->vl / ego->desc->genus->vl,
                    &ego->desc->ops,
                    &pln->super.super.ops);

    pln->super.super.could_prune_now_p = 1;

    return &(pln->super.super);
}
Exemple #7
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     const S *ego = (const S *) ego_;
     P *pln;
     plan *cld = 0, *cldcpy = 0;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *) 0;

     cldcpy = X(mkplan_d)(plnr,
			  X(mkproblem_rdft_0_d)(
			       X(tensor_append)(p->vecsz, p->sz),
			       p->I, p->O));
     if (!cldcpy) goto nada;

     cld = X(mkplan_f_d)(plnr, ego->adt->mkcld(p), NO_BUFFERING, 0, 0);
     if (!cld) goto nada;

     pln = MKPLAN_RDFT(P, &padt, ego->adt->apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->slv = ego;
     X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld);
     X(plan_destroy_internal)(cldcpy);
     return (plan *)0;
}
Exemple #8
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *cld;
     R *buf;
     INT n;
     opcnt ops;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *)0;

     p = (const problem_rdft *) p_;

     n = p->sz->dims[0].n;
     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);

     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
                                                   X(mktensor_0d)(),
                                                   buf, buf, R2HC));
     X(ifree)(buf);
     if (!cld)
          return (plan *)0;

     switch (p->kind[0]) {
	 case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break;
	 case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break;
	 case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break;
	 case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break;
	 default: A(0); return (plan*)0;
     }

     pln->n = n;
     pln->is = p->sz->dims[0].is;
     pln->os = p->sz->dims[0].os;
     pln->cld = cld;
     pln->td = 0;
     pln->kind = p->kind[0];
     
     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     
     X(ops_zero)(&ops);
     ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5;
     if (p->kind[0] == REDFT01 || p->kind[0] == RODFT01) {
	  ops.add = (n-1)/2 * 6;
	  ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2;
     }
     else { /* 10 transforms */
	  ops.add = (n-1)/2 * 2;
	  ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2;
     }
     
     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);

     return &(pln->super.super);
}
Exemple #9
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft *p;
     P *pln;
     problem *cldp;
     int vdim;
     iodim *d;
     plan **cldrn = (plan **) 0;
     int i, nthr;
     INT its, ots, block_size;
     tensor *vecsz;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr, &vdim))
          return (plan *) 0;
     p = (const problem_rdft *) p_;

     d = p->vecsz->dims + vdim;

     block_size = (d->n + plnr->nthr - 1) / plnr->nthr;
     nthr = (int)((d->n + block_size - 1) / block_size);
     plnr->nthr = (plnr->nthr + nthr - 1) / nthr;
     its = d->is * block_size;
     ots = d->os * block_size;

     cldrn = (plan **)MALLOC(sizeof(plan *) * nthr, PLANS);
     for (i = 0; i < nthr; ++i) cldrn[i] = (plan *) 0;
     
     vecsz = X(tensor_copy)(p->vecsz);
     for (i = 0; i < nthr; ++i) {
	  vecsz->dims[vdim].n =
	       (i == nthr - 1) ? (d->n - i*block_size) : block_size;
	  cldp = X(mkproblem_rdft)(p->sz, vecsz,
				   p->I + i*its, p->O + i*ots, p->kind);
	  cldrn[i] = X(mkplan_d)(plnr, cldp);
	  if (!cldrn[i]) goto nada;
     }
     X(tensor_destroy)(vecsz);

     pln = MKPLAN_RDFT(P, &padt, apply);

     pln->cldrn = cldrn;
     pln->its = its;
     pln->ots = ots;
     pln->nthr = nthr;

     pln->solver = ego;
     X(ops_zero)(&pln->super.super.ops);
     pln->super.super.pcost = 0;
     for (i = 0; i < nthr; ++i) {
	  X(ops_add2)(&cldrn[i]->ops, &pln->super.super.ops);
	  pln->super.super.pcost += cldrn[i]->pcost;
     }

     return &(pln->super.super);

 nada:
     if (cldrn) {
	  for (i = 0; i < nthr; ++i)
	       X(plan_destroy_internal)(cldrn[i]);
	  X(ifree)(cldrn);
     }
     X(tensor_destroy)(vecsz);
     return (plan *) 0;
}
Exemple #10
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const hc2hc_solver *ego = (const hc2hc_solver *) ego_;
     const problem_rdft *p;
     P *pln = 0;
     plan *cld = 0, **cldws = 0;
     INT n, r, m, v, ivs, ovs, mcount;
     int i, nthr, plnr_nthr_save;
     INT block_size;
     iodim *d;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (plnr->nthr <= 1 || !X(hc2hc_applicable)(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_rdft *) p_;
     d = p->sz->dims;
     n = d[0].n;
     r = X(choose_radix)(ego->r, n);
     m = n / r;
     mcount = (m + 2) / 2;

     X(tensor_tornk1)(p->vecsz, &v, &ivs, &ovs);

     block_size = (mcount + plnr->nthr - 1) / plnr->nthr;
     nthr = (int)((mcount + block_size - 1) / block_size);
     plnr_nthr_save = plnr->nthr;
     plnr->nthr = (plnr->nthr + nthr - 1) / nthr;

     cldws = (plan **) MALLOC(sizeof(plan *) * nthr, PLANS);
     for (i = 0; i < nthr; ++i) cldws[i] = (plan *) 0;

     switch (p->kind[0]) {
	 case R2HC:
	      for (i = 0; i < nthr; ++i) {
		   cldws[i] = ego->mkcldw(ego, 
					  R2HC, r, m, d[0].os, v, ovs, 
					  i*block_size, 
					  (i == nthr - 1) ? 
					  (mcount - i*block_size) : block_size,
					  p->O, plnr);
		   if (!cldws[i]) goto nada;
	      }

	      plnr->nthr = plnr_nthr_save;

	      cld = X(mkplan_d)(plnr, 
				X(mkproblem_rdft_d)(
				     X(mktensor_1d)(m, r * d[0].is, d[0].os),
				     X(mktensor_2d)(r, d[0].is, m * d[0].os,
						    v, ivs, ovs),
				     p->I, p->O, p->kind)
		   );
	      if (!cld) goto nada;

	      pln = MKPLAN_RDFT(P, &padt, apply_dit);
	      break;

	 case HC2R:
	      for (i = 0; i < nthr; ++i) {
		   cldws[i] = ego->mkcldw(ego, 
					  HC2R, r, m, d[0].is, v, ivs, 
					  i*block_size, 
					  (i == nthr - 1) ? 
					  (mcount - i*block_size) : block_size,
					  p->I, plnr);
		   if (!cldws[i]) goto nada;
	      }

	      plnr->nthr = plnr_nthr_save;

	      cld = X(mkplan_d)(plnr, 
				X(mkproblem_rdft_d)(
				     X(mktensor_1d)(m, d[0].is, r * d[0].os),
				     X(mktensor_2d)(r, m * d[0].is, d[0].os,
						    v, ivs, ovs),
				     p->I, p->O, p->kind)
		   );
	      if (!cld) goto nada;
	      
	      pln = MKPLAN_RDFT(P, &padt, apply_dif);
	      break;

	 default: 
	      A(0);
     }

     pln->cld = cld;
     pln->cldws = cldws;
     pln->nthr = nthr;
     pln->r = r;
     X(ops_zero)(&pln->super.super.ops);
     for (i = 0; i < nthr; ++i) {
          X(ops_add2)(&cldws[i]->ops, &pln->super.super.ops);
	  pln->super.super.could_prune_now_p |= cldws[i]->could_prune_now_p;
     }
     X(ops_add2)(&cld->ops, &pln->super.super.ops);
     return &(pln->super.super);

 nada:
     if (cldws) {
	  for (i = 0; i < nthr; ++i)
	       X(plan_destroy_internal)(cldws[i]);
	  X(ifree)(cldws);
     }
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *cld = (plan *) 0, *cldcpy;
     R *buf = (R *) 0;
     int n;
     int vl, ivs, ovs;
     opcnt ops;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
	  goto nada;

     p = (const problem_rdft *) p_;

     n = p->sz->dims[0].n - 1;
     A(n > 0);
     buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);

     cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), 
						  X(mktensor_0d)(), 
						  buf, buf, R2HC));
     if (!cld)
	  goto nada;

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
     cldcpy =
	  X(mkplan_d)(plnr,
		      X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
					    X(mktensor_1d)(n+1,1,
							   p->sz->dims[0].os), 
					    buf, TAINT(p->O, ovs), R2HC));
     if (!cldcpy)
	  goto nada;

     X(ifree)(buf);

     pln = MKPLAN_RDFT(P, &padt, apply);

     pln->n = n;
     pln->is = p->sz->dims[0].is;
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->vl = vl;
     pln->ivs = ivs;
     pln->ovs = ovs;
     
     X(ops_zero)(&ops);
     ops.other = n + 2*n; /* loads + stores (input -> buf) */

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(ifree0)(buf);
     if (cld)
	  X(plan_destroy_internal)(cld);  
     return (plan *)0;
}
Exemple #12
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const hc2hc_solver *ego = (const hc2hc_solver *) ego_;
     const problem_rdft *p;
     P *pln = 0;
     plan *cld = 0, *cldw = 0;
     INT n, r, m, vl, ivs, ovs;
     iodim *d;
     tensor *t1, *t2;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (NO_NONTHREADEDP(plnr) || !X(hc2hc_applicable)(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_rdft *) p_;
     d = p->sz->dims;
     n = d[0].n;
     r = X(choose_radix)(ego->r, n);
     m = n / r;

     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

     switch (p->kind[0]) {
	 case R2HC:
	      cldw = ego->mkcldw(ego, 
				 R2HC, r, m, d[0].os, vl, ovs, 0, (m+2)/2, 
				 p->O, plnr);
	      if (!cldw) goto nada;

	      t1 = X(mktensor_1d)(r, d[0].is, m * d[0].os);
	      t2 = X(tensor_append)(t1, p->vecsz);
	      X(tensor_destroy)(t1);

	      cld = X(mkplan_d)(plnr, 
				X(mkproblem_rdft_d)(
				     X(mktensor_1d)(m, r * d[0].is, d[0].os),
				     t2, p->I, p->O, p->kind)
		   );
	      if (!cld) goto nada;

	      pln = MKPLAN_RDFT(P, &padt, apply_dit);
	      break;

	 case HC2R:
	      cldw = ego->mkcldw(ego,
				 HC2R, r, m, d[0].is, vl, ivs, 0, (m+2)/2, 
				 p->I, plnr);
	      if (!cldw) goto nada;

	      t1 = X(mktensor_1d)(r, m * d[0].is, d[0].os);
	      t2 = X(tensor_append)(t1, p->vecsz);
	      X(tensor_destroy)(t1);

	      cld = X(mkplan_d)(plnr, 
				X(mkproblem_rdft_d)(
				     X(mktensor_1d)(m, d[0].is, r * d[0].os),
				     t2, p->I, p->O, p->kind)
		   );
	      if (!cld) goto nada;
	      
	      pln = MKPLAN_RDFT(P, &padt, apply_dif);
	      break;

	 default: 
	      A(0);
	      
     }

     pln->cld = cld;
     pln->cldw = cldw;
     pln->r = r;
     X(ops_add)(&cld->ops, &cldw->ops, &pln->super.super.ops);

     /* inherit could_prune_now_p attribute from cldw */
     pln->super.super.could_prune_now_p = cldw->could_prune_now_p;

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldw);
     X(plan_destroy_internal)(cld);
     return (plan *) 0;
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const S *ego = (const S *)ego_;
     plan *cld = (plan *) 0;
     plan *cldcpy = (plan *) 0;
     plan *cldrest = (plan *) 0;
     const problem_rdft *p = (const problem_rdft *) p_;
     float *bufs = (float *) 0;
     INT nbuf = 0, bufdist, n, vl;
     INT ivs, ovs;
     int hc2rp;

     static const plan_adt padt = {
	  fftwf_rdft_solve, awake, print, destroy
     };

     if (!applicable(ego, p_, plnr))
          goto nada;

     n = fftwf_tensor_sz(p->sz);
     fftwf_tensor_tornk1(p->vecsz, &vl, &ivs, &ovs);
     hc2rp = (p->kind[0] == HC2R);

     nbuf = fftwf_nbuf(n, vl, maxnbufs[ego->maxnbuf_ndx]);
     bufdist = fftwf_bufdist(n, vl);
     A(nbuf > 0);

     /* initial allocation for the purpose of planning */
     bufs = (float *) MALLOC(sizeof(float) * nbuf * bufdist, BUFFERS);

     if (hc2rp) {
	  /* allow destruction of buffer */
	  cld = fftwf_mkplan_f_d(plnr,
			      fftwf_mkproblem_rdft_d(
				   fftwf_mktensor_1d(n, 1, p->sz->dims[0].os),
				   fftwf_mktensor_1d(nbuf, bufdist, ovs),
				   bufs, TAINT(p->O, ovs * nbuf), p->kind),
			      0, 0, NO_DESTROY_INPUT);
	  if (!cld) goto nada;

	  /* copying input into buffer buffer is a rank-0 transform: */
	  cldcpy = fftwf_mkplan_d(plnr,
			       fftwf_mkproblem_rdft_0_d(
				    fftwf_mktensor_2d(nbuf, ivs, bufdist,
						   n, p->sz->dims[0].is, 1),
				    TAINT(p->I, ivs * nbuf), bufs));
	  if (!cldcpy) goto nada;
     } else {
	  /* allow destruction of input if problem is in place */
	  cld = fftwf_mkplan_f_d(plnr,
			      fftwf_mkproblem_rdft_d(
				   fftwf_mktensor_1d(n, p->sz->dims[0].is, 1),
				   fftwf_mktensor_1d(nbuf, ivs, bufdist),
				   TAINT(p->I, ivs * nbuf), bufs, p->kind),
			      0, 0, (p->I == p->O) ? NO_DESTROY_INPUT : 0);
	  if (!cld) goto nada;

	  /* copying back from the buffer is a rank-0 transform: */
	  cldcpy = fftwf_mkplan_d(plnr,
			       fftwf_mkproblem_rdft_0_d(
				    fftwf_mktensor_2d(nbuf, bufdist, ovs,
						   n, 1, p->sz->dims[0].os),
				    bufs, TAINT(p->O, ovs * nbuf)));
	  if (!cldcpy) goto nada;
     }

     /* deallocate buffers, let apply() allocate them for real */
     fftwf_ifree(bufs);
     bufs = 0;

     /* plan the leftover transforms (cldrest): */
     {
	  INT id = ivs * (nbuf * (vl / nbuf));
	  INT od = ovs * (nbuf * (vl / nbuf));
	  cldrest = fftwf_mkplan_d(plnr,
				fftwf_mkproblem_rdft_d(
				     fftwf_tensor_copy(p->sz),
				     fftwf_mktensor_1d(vl % nbuf, ivs, ovs),
				     p->I + id, p->O + od, p->kind));
     }
     if (!cldrest) goto nada;

     pln = MKPLAN_RDFT(P, &padt, hc2rp ? apply_hc2r : apply);
     pln->cld = cld;
     pln->cldcpy = cldcpy;
     pln->cldrest = cldrest;
     pln->n = n;
     pln->vl = vl;
     pln->ivs_by_nbuf = ivs * nbuf;
     pln->ovs_by_nbuf = ovs * nbuf;

     pln->nbuf = nbuf;
     pln->bufdist = bufdist;

     {
	  opcnt t;
	  fftwf_ops_add(&cld->ops, &cldcpy->ops, &t);
	  fftwf_ops_madd(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
     }

     return &(pln->super.super);

 nada:
     fftwf_ifree0(bufs);
     fftwf_plan_destroy_internal(cldrest);
     fftwf_plan_destroy_internal(cldcpy);
     fftwf_plan_destroy_internal(cld);
     return (plan *) 0;
}
Exemple #14
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     P *pln;
     const problem_rdft *p;
     iodim *d;
     INT rs, cs, b, n;

     static const plan_adt padt = {
	  X(rdft_solve), X(null_awake), print, destroy
     };

     UNUSED(plnr);

     if (ego->bufferedp) {
	  if (!applicable_buf(ego_, p_))
	       return (plan *)0;
     } else {
	  if (!applicable(ego_, p_))
	       return (plan *)0;
     }

     p = (const problem_rdft *) p_;

     if (R2HC_KINDP(p->kind[0])) {
	  rs = p->sz->dims[0].is; cs = p->sz->dims[0].os;
	  pln = MKPLAN_RDFT(P, &padt, 
			    ego->bufferedp ? apply_buf_r2hc : apply_r2hc);
     } else {
	  rs = p->sz->dims[0].os; cs = p->sz->dims[0].is;
	  pln = MKPLAN_RDFT(P, &padt, 
			    ego->bufferedp ? apply_buf_hc2r : apply_hc2r);
     }

     d = p->sz->dims;
     n = d[0].n;

     pln->k = ego->k;
     pln->n = n;

     pln->rs0 = rs;
     pln->rs = X(mkstride)(n, 2 * rs);
     pln->csr = X(mkstride)(n, cs);
     pln->csi = X(mkstride)(n, -cs);
     pln->ioffset = ioffset(p->kind[0], n, cs);

     b = compute_batchsize(n);
     pln->brs = X(mkstride)(n, 2 * b);
     pln->bcsr = X(mkstride)(n, b);
     pln->bcsi = X(mkstride)(n, -b);
     pln->bioffset = ioffset(p->kind[0], n, b);

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);

     pln->slv = ego;
     X(ops_zero)(&pln->super.super.ops);

     X(ops_madd2)(pln->vl / ego->desc->genus->vl,
		  &ego->desc->ops,
		  &pln->super.super.ops);

     if (ego->bufferedp) 
	  pln->super.super.ops.other += 2 * n * pln->vl;

     pln->super.super.could_prune_now_p = !ego->bufferedp;

     return &(pln->super.super);
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     P *pln;
     const problem_rdft *p;
     plan *clde, *cldo;
     R *buf;
     INT n, n0;
     opcnt ops;
     int inplace_odd;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
          return (plan *)0;

     p = (const problem_rdft *) p_;

     n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1);
     A(n > 0 && n % 2 == 0);
     buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS);

     inplace_odd = p->kind[0]==RODFT00 && p->I == p->O;
     clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
			     X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is, 
					    inplace_odd ? p->sz->dims[0].is
					    : p->sz->dims[0].os), 
			     X(mktensor_0d)(), 
			     TAINT(p->I 
				   + p->sz->dims[0].is * (p->kind[0]==RODFT00),
				   p->vecsz->rnk ? p->vecsz->dims[0].is : 0),
			     TAINT(p->O
				   + p->sz->dims[0].is * inplace_odd,
				   p->vecsz->rnk ? p->vecsz->dims[0].os : 0),
			     p->kind[0]));
     if (!clde) {
	  X(ifree)(buf);
          return (plan *)0;
     }

     cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
			     X(mktensor_1d)(n/2, 1, 1), 
			     X(mktensor_0d)(), 
			     buf, buf, R2HC));
     X(ifree)(buf);
     if (!cldo)
          return (plan *)0;

     pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o);

     pln->n = n;
     pln->is = p->sz->dims[0].is;
     pln->os = p->sz->dims[0].os;
     pln->clde = clde;
     pln->cldo = cldo;
     pln->td = 0;

     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
     
     X(ops_zero)(&ops);
     ops.other = n/2;
     ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) +
	  (n/2-1)/2 * 6 + ((n/2)%2==0) * 2;
     ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2;

     /* tweak ops.other so that r2hc-pad is used for small sizes, which
	seems to be a lot faster on my machine: */
     ops.other += 256;

     X(ops_zero)(&pln->super.super.ops);
     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops);
     X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops);

     return &(pln->super.super);
}
Exemple #16
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft *p = (const problem_rdft *) p_;
     P *pln;
     INT n, npad;
     INT is, os;
     plan *cld1 = (plan *) 0;
     plan *cld2 = (plan *) 0;
     plan *cld_omega = (plan *) 0;
     R *buf = (R *) 0;
     problem *cldp;

     static const plan_adt padt = {
	  X(rdft_solve), awake, print, destroy
     };

     if (!applicable(ego_, p_, plnr))
	  return (plan *) 0;

     n = p->sz->dims[0].n;
     is = p->sz->dims[0].is;
     os = p->sz->dims[0].os;

     if (ego->pad)
	  npad = choose_transform_size(2 * (n - 1) - 1);
     else
	  npad = n - 1;

     /* initial allocation for the purpose of planning */
     buf = (R *) MALLOC(sizeof(R) * npad, BUFFERS);

     cld1 = X(mkplan_f_d)(plnr, 
			  X(mkproblem_rdft_1_d)(X(mktensor_1d)(npad, 1, 1),
						X(mktensor_1d)(1, 0, 0),
						buf, buf,
						R2HC),
			  NO_SLOW, 0, 0);
     if (!cld1) goto nada;

     cldp =
          X(mkproblem_rdft_1_d)(
               X(mktensor_1d)(npad, 1, 1),
               X(mktensor_1d)(1, 0, 0),
	       buf, buf, 
#if R2HC_ONLY_CONV
	       R2HC
#else
	       HC2R
#endif
	       );
     if (!(cld2 = X(mkplan_f_d)(plnr, cldp, NO_SLOW, 0, 0)))
	  goto nada;

     /* plan for omega */
     cld_omega = X(mkplan_f_d)(plnr, 
			       X(mkproblem_rdft_1_d)(
				    X(mktensor_1d)(npad, 1, 1),
				    X(mktensor_1d)(1, 0, 0),
				    buf, buf, R2HC),
			       NO_SLOW, ESTIMATE, 0);
     if (!cld_omega) goto nada;

     /* deallocate buffers; let awake() or apply() allocate them for real */
     X(ifree)(buf);
     buf = 0;

     pln = MKPLAN_RDFT(P, &padt, apply);
     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->cld_omega = cld_omega;
     pln->omega = 0;
     pln->n = n;
     pln->npad = npad;
     pln->is = is;
     pln->os = os;

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     pln->super.super.ops.other += (npad/2-1)*6 + npad + n + (n-1) * ego->pad;
     pln->super.super.ops.add += (npad/2-1)*2 + 2 + (n-1) * ego->pad;
     pln->super.super.ops.mul += (npad/2-1)*4 + 2 + ego->pad;
#if R2HC_ONLY_CONV
     pln->super.super.ops.other += n-2 - ego->pad;
     pln->super.super.ops.add += (npad/2-1)*2 + (n-2) - ego->pad;
#endif

     return &(pln->super.super);

 nada:
     X(ifree0)(buf);
     X(plan_destroy_internal)(cld_omega);
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     return 0;
}
Exemple #17
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const hc2hc_solver *ego = (const hc2hc_solver *) ego_;
     const problem_rdft *p;
     P *pln = 0;
     plan *cld = 0, *cldw = 0;
     INT n, r, m, v, ivs, ovs;
     iodim *d;

     static const plan_adt padt = {
	  fftwf_rdft_solve, awake, print, destroy
     };

     if (NO_NONTHREADEDP(plnr) || !fftwf_hc2hc_applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_rdft *) p_;
     d = p->sz->dims;
     n = d[0].n;
     r = fftwf_choose_radix(ego->r, n);
     m = n / r;

     fftwf_tensor_tornk1(p->vecsz, &v, &ivs, &ovs);

     switch (p->kind[0]) {
	 case R2HC:
	      cldw = ego->mkcldw(ego,
				 R2HC, r, m, d[0].os, v, ovs, 0, (m+2)/2,
				 p->O, plnr);
	      if (!cldw) goto nada;

	      cld = fftwf_mkplan_d(plnr,
				fftwf_mkproblem_rdft_d(
				     fftwf_mktensor_1d(m, r * d[0].is, d[0].os),
				     fftwf_mktensor_2d(r, d[0].is, m * d[0].os,
						    v, ivs, ovs),
				     p->I, p->O, p->kind)
		   );
	      if (!cld) goto nada;

	      pln = MKPLAN_RDFT(P, &padt, apply_dit);
	      break;

	 case HC2R:
	      cldw = ego->mkcldw(ego,
				 HC2R, r, m, d[0].is, v, ivs, 0, (m+2)/2,
				 p->I, plnr);
	      if (!cldw) goto nada;

	      cld = fftwf_mkplan_d(plnr,
				fftwf_mkproblem_rdft_d(
				     fftwf_mktensor_1d(m, d[0].is, r * d[0].os),
				     fftwf_mktensor_2d(r, m * d[0].is, d[0].os,
						    v, ivs, ovs),
				     p->I, p->O, p->kind)
		   );
	      if (!cld) goto nada;

	      pln = MKPLAN_RDFT(P, &padt, apply_dif);
	      break;

	 default:
	      A(0);
     }

     pln->cld = cld;
     pln->cldw = cldw;
     pln->r = r;
     fftwf_ops_add(&cld->ops, &cldw->ops, &pln->super.super.ops);

     /* inherit could_prune_now_p attribute from cldw */
     pln->super.super.could_prune_now_p = cldw->could_prune_now_p;

     return &(pln->super.super);

 nada:
     fftwf_plan_destroy_internal(cldw);
     fftwf_plan_destroy_internal(cld);
     return (plan *) 0;
}