Esempio n. 1
0
static int applicable0(const solver *ego_, const problem *p_,
		       const planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft *p = (const problem_rdft *) p_;
     return (1
	     && FINITE_RNK(p->vecsz->rnk)

	     /* problem must be a nontrivial transform, not just a copy */
	     && p->sz->rnk > 0

	     && (0

		 /* problem must be in-place & require some
		    rearrangement of the data */
		 || (p->I == p->O
		     && !(X(tensor_inplace_strides2)(p->sz, p->vecsz)))

		 /* or problem must be out of place, transforming
		    from stride 1/2 to bigger stride, for apply_after */
		 || (p->I != p->O && ego->adt->apply == apply_after
		     && !NO_DESTROY_INPUTP(plnr)
		     && X(tensor_min_istride)(p->sz) <= 2
		     && X(tensor_min_ostride)(p->sz) > 2)
			  
		 /* or problem must be out of place, transforming
		    to stride 1/2 from bigger stride, for apply_before */
		 || (p->I != p->O && ego->adt->apply == apply_before
		     && X(tensor_min_ostride)(p->sz) <= 2
		     && X(tensor_min_istride)(p->sz) > 2)
			  
		  )
	  );
}
Esempio n. 2
0
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     iodim *d = p->sz->dims;

     if (1
	 && p->vecsz->rnk <= 1
	 && p->sz->rnk == 1
	  ) {
	  INT vl, ivs, ovs;
	  fftwf_tensor_tornk1(p->vecsz, &vl, &ivs, &ovs);

	  if (fftwf_toobig(d[0].n) && CONSERVE_MEMORYP(plnr))
	       return 0;

	  /* if this solver is redundant, in the sense that a solver
	     of lower index generates the same plan, then prune this
	     solver */
	  if (fftwf_nbuf_redundant(d[0].n, vl,
				ego->maxnbuf_ndx,
				maxnbufs, NELEM(maxnbufs)))
	       return 0;

	  if (p->I != p->O) {
	       if (p->kind[0] == HC2R) {
		    /* Allow HC2R problems only if the input is to be
		       preserved.  This solver sets NO_DESTROY_INPUT,
		       which prevents infinite loops */
		    return (NO_DESTROY_INPUTP(plnr));
	       } else {
		    /*
		      In principle, the buffered transforms might be useful
		      when working out of place.  However, in order to
		      prevent infinite loops in the planner, we require
		      that the output stride of the buffered transforms be
		      greater than 1.
		    */
		    return (d[0].os > 1);
	       }
	  }

	  /*
	   * If the problem is in place, the input/output strides must
	   * be the same or the whole thing must fit in the buffer.
	   */
	  if (fftwf_tensor_inplace_strides2(p->sz, p->vecsz))
	       return 1;

	  if (/* fits into buffer: */
	       ((p->vecsz->rnk == 0)
		||
		(fftwf_nbuf(d[0].n, p->vecsz->dims[0].n,
			 maxnbufs[ego->maxnbuf_ndx])
		 == p->vecsz->dims[0].n)))
	       return 1;
     }

     return 0;
}
static int applicable0(const problem *p_, const planner *plnr)
{
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     iodim *d = p->sz->dims;

     if (1
	 && p->vecsz->rnk <= 1
	 && p->sz->rnk == 1

	 /* we assume even n throughout */
	 && (p->sz->dims[0].n % 2) == 0

	 /* and we only consider these two cases */
	 && (p->kind == R2HC || p->kind == HC2R)

	  ) {

	  if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
	       return 0;

	  if (p->r0 != p->cr) {
	       if (p->kind == HC2R) {
		    /* Allow HC2R problems only if the input is to be
		       preserved.  This solver sets NO_DESTROY_INPUT,
		       which prevents infinite loops */
		    return (NO_DESTROY_INPUTP(plnr));
	       } else {
		    /*
		      In principle, the buffered transforms might be useful
		      when working out of place.  However, in order to
		      prevent infinite loops in the planner, we require
		      that the output stride of the buffered transforms be
		      greater than 2.
		    */
		    return (d[0].os > 2);
	       }
	  }

	  /*
	   * If the problem is in place, the input/output strides must
	   * be the same or the whole thing must fit in the buffer.
	   */
	  if (X(rdft2_inplace_strides(p, RNK_MINFTY)))
	       return 1;

	  if (/* fits into buffer: */
	       ((p->vecsz->rnk == 0)
		||
		(X(nbuf)(d[0].n, p->vecsz->dims[0].n) == p->vecsz->dims[0].n)))
	       return 1;
     }

     return 0;
}
Esempio n. 4
0
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr)
{
     const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_;
     /* Note: this is *not* UGLY for out-of-place, destroy-input plans;
	the planner often prefers transpose-pairwise to transpose-alltoall,
	at least with LAM MPI on my machine. */
     return (1
	     && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
					  && p->I != p->O))
	     && ONLY_TRANSPOSEDP(p->flags));
}
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr)
{
     const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_;
     return (1
	     && p->I != p->O
	     && (!NO_DESTROY_INPUTP(plnr) || 
		 ((p->flags & TRANSPOSED_IN) && !ego->copy_transposed_in))
	     && ((p->flags & TRANSPOSED_IN) || !ego->copy_transposed_in)
	     && ONLY_TRANSPOSEDP(p->flags)
	  );
}
Esempio n. 6
0
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr)
{
     const problem_mpi_dft *p = (const problem_mpi_dft *) p_;
     return (1
	     && p->sz->rnk > 1
	     && p->flags == 0 /* TRANSPOSED/SCRAMBLED_IN/OUT not supported */
	     && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
					  && p->I != p->O))
	     && XM(is_local_after)(1, p->sz, IB)
	     && XM(is_local_after)(1, p->sz, OB)
	     && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */
		 || !XM(dft_serial_applicable)(p))
	  );
}
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr)
{
     const problem_mpi_dft *p = (const problem_mpi_dft *) p_;
     return (1
	     && p->sz->rnk > 1
	     && p->flags == TRANSPOSED_OUT
	     && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
					  && p->I != p->O))
	     && XM(is_local_after)(1, p->sz, IB)
	     && XM(is_local_after)(2, p->sz, OB)
	     && XM(num_blocks)(p->sz->dims[0].n, p->sz->dims[0].b[OB]) == 1
	     && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */
		 || !XM(dft_serial_applicable)(p))
	  );
}
Esempio n. 8
0
File: ct.c Progetto: Aegisub/fftw3
static int applicable0(const ct_solver *ego, const problem *p_, planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     INT r;

     return (1
	     && p->sz->rnk == 1
	     && p->vecsz->rnk <= 1

	     /* DIF destroys the input and we don't like it */
	     && (ego->dec == DECDIT ||
		 p->ri == p->ro ||
		 !NO_DESTROY_INPUTP(plnr))

	     && ((r = X(choose_radix)(ego->r, p->sz->dims[0].n)) > 1)
	     && p->sz->dims[0].n > r);
}
Esempio n. 9
0
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr, int *r)
{
     const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_;
     int n_pes;
     MPI_Comm_size(p->comm, &n_pes);
     return (1
	     && p->tblock * n_pes == p->ny
	     && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
                                          && p->I != p->O))
	     && (*r = ego->radix(n_pes)) && *r < n_pes && *r > 1
	     && enough_space(p->nx, p->ny, p->block, p->tblock, *r, n_pes)
	     && (!CONSERVE_MEMORYP(plnr) || *r > 8
		 || !X(toobig)((p->nx * (p->ny / n_pes) * p->vn) / *r))
	     && (!NO_SLOWP(plnr) || 
		 (p->nx * (p->ny / n_pes) * p->vn) / n_pes <= SMALL_MESSAGE)
	     && ONLY_TRANSPOSEDP(p->flags)
	  );
}
Esempio n. 10
0
static int applicable0(const hc2hc_solver *ego, const problem *p_, planner *plnr)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     INT r;

     return (1
	     && p->sz->rnk == 1
	     && p->vecsz->rnk <= 1 

	     && (/* either the problem is R2HC, which is solved by DIT */
		  (p->kind[0] == R2HC)
		  ||
		  /* or the problem is HC2R, in which case it is solved
		     by DIF, which destroys the input */
		  (p->kind[0] == HC2R && 
		   (p->I == p->O || !NO_DESTROY_INPUTP(plnr))))
		  
	     && ((r = X(choose_radix)(ego->r, p->sz->dims[0].n)) > 0)
	     && p->sz->dims[0].n > r);
}
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr)
{
     const problem_mpi_dft *p = (const problem_mpi_dft *) p_;
     int n_pes;
     MPI_Comm_size(p->comm, &n_pes);
     return (1
	     && p->sz->rnk == 1
	     && !(p->flags & ~RANK1_BIGVEC_ONLY)
	     && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
					  && p->I != p->O))
	     && (p->vn >= n_pes /* TODO: relax this, using more memory? */
		 || (p->flags & RANK1_BIGVEC_ONLY))

	     && XM(rearrange_applicable)(ego->rearrange,
					 p->sz->dims[0], p->vn, n_pes)

	     && (!NO_SLOWP(plnr) /* slow if dft-serial is applicable */
                 || !XM(dft_serial_applicable)(p))
	  );
}
Esempio n. 12
0
static int applicable0(const solver *ego_, const problem *p_,
		       const planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_dft *p = (const problem_dft *) p_;
     return (1
	     && FINITE_RNK(p->vecsz->rnk)

	     /* problem must be a nontrivial transform, not just a copy */
	     && p->sz->rnk > 0

	     && (0

		 /* problem must be in-place & require some
		    rearrangement of the data; to prevent
		    infinite loops with indirect-transpose, we
		    further require that at least some transform
		    strides must decrease */
		 || (p->ri == p->ro
		     && !X(tensor_inplace_strides2)(p->sz, p->vecsz)
		     && X(tensor_strides_decrease)(
			  p->sz, p->vecsz,
			  ego->adt->apply == apply_after ? 
			  INPLACE_IS : INPLACE_OS))

		 /* or problem must be out of place, transforming
		    from stride 1/2 to bigger stride, for apply_after */
		 || (p->ri != p->ro && ego->adt->apply == apply_after
		     && !NO_DESTROY_INPUTP(plnr)
		     && X(tensor_min_istride)(p->sz) <= 2
		     && X(tensor_min_ostride)(p->sz) > 2)
			  
		 /* or problem must be out of place, transforming
		    to stride 1/2 from bigger stride, for apply_before */
		 || (p->ri != p->ro && ego->adt->apply == apply_before
		     && X(tensor_min_ostride)(p->sz) <= 2
		     && X(tensor_min_istride)(p->sz) > 2)
		  )
	  );
}
Esempio n. 13
0
static int applicable0(const solver *ego_, const problem *p_, int *rp,
		       const planner *plnr)
{
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     const S *ego = (const S *)ego_;
     return (1
	     && FINITE_RNK(p->sz->rnk) && FINITE_RNK(p->vecsz->rnk)

	     /* FIXME: multidimensional R2HCII ? */
	     && (p->kind == R2HC || p->kind == HC2R)

	     && p->sz->rnk >= 2
	     && picksplit(ego, p->sz, rp)
	     && (0

		 /* can work out-of-place, but HC2R destroys input */
		 || (p->r0 != p->cr && 
		     (p->kind == R2HC || !NO_DESTROY_INPUTP(plnr)))

		 /* FIXME: what are sufficient conditions for inplace? */
		 || (p->r0 == p->cr))
	  );
}
Esempio n. 14
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_rdft2 *p;
     P *pln;
     plan *cld1 = 0, *cldt = 0, *cld2 = 0;
     R *r0, *r1, *cr, *ci, *ri, *ii, *ro, *io, *I, *O;
     tensor *sz;
     int i, my_pe, n_pes;
     INT nrest, n1, b1;
     static const plan_adt padt = {
          XM(rdft2_solve), awake, print, destroy
     };
     block_kind k1, k2;

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_rdft2 *) p_;

     I = p->I; O = p->O;
     if (p->kind == R2HC) {
	  k1 = IB; k2 = OB;
          r1 = (r0 = I) + p->vn;
	  if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) {
	       ci = (cr = O) + 1;
	       I = O; 
	  }
	  else 
	       ci = (cr = I) + 1;
	  io = ii = (ro = ri = O) + 1;
     }
     else {
	  k1 = OB; k2 = IB;
	  r1 = (r0 = O) + p->vn;
	  ci = (cr = O) + 1;
	  if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) {
	       ri = (ii = I) + 1;
	       ro = (io = O) + 1;
	       I = O;
	  }
	  else
	       ro = ri = (io = ii = I) + 1;
     }

     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);

     sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
     i = p->sz->rnk - 2; A(i >= 0);
     sz->dims[i].n = p->sz->dims[i+1].n / 2 + 1;
     sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
     for (--i; i >= 0; --i) {
	  sz->dims[i].n = p->sz->dims[i+1].n;
	  sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
     }
     nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
     {
	  INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC);
          INT is = sz->dims[0].n * sz->dims[0].is;
          INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[k1], my_pe);
	  sz->dims[p->sz->rnk - 2].n = p->sz->dims[p->sz->rnk - 1].n;
	  cld1 = X(mkplan_d)(plnr,
                             X(mkproblem_rdft2_d)(sz,
						  X(mktensor_2d)(b, is, is,
								p->vn,ivs,ovs),
						  r0, r1, cr, ci, p->kind));
	  if (XM(any_true)(!cld1, p->comm)) goto nada;
     }

     nrest *= p->vn;
     n1 = p->sz->dims[1].n;
     b1 = p->sz->dims[1].b[k2];
     if (p->sz->rnk == 2) { /* n1 dimension is cut in ~half */
	  n1 = n1 / 2 + 1;
	  b1 = b1 == p->sz->dims[1].n ? n1 : b1;
     }

     if (p->kind == R2HC)
	  cldt = X(mkplan_d)(plnr,
			     XM(mkproblem_transpose)(
				  p->sz->dims[0].n, n1, nrest * 2,
				  I, O,
				  p->sz->dims[0].b[IB], b1,
				  p->comm, 0));
     else
	  cldt = X(mkplan_d)(plnr,
			     XM(mkproblem_transpose)(
				  n1, p->sz->dims[0].n, nrest * 2,
				  I, O,
				  b1, p->sz->dims[0].b[OB], 
				  p->comm, 0));
     if (XM(any_true)(!cldt, p->comm)) goto nada;

     {
	  INT is = p->sz->dims[0].n * nrest * 2;
	  INT b = XM(block)(n1, b1, my_pe);
	  cld2 = X(mkplan_d)(plnr,
			     X(mkproblem_dft_d)(X(mktensor_1d)(
						     p->sz->dims[0].n,
						     nrest * 2, nrest * 2),
						X(mktensor_2d)(b, is, is,
							       nrest, 2, 2),
						ri, ii, ro, io));
	  if (XM(any_true)(!cld2, p->comm)) goto nada;
     }

     pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r);
     pln->cld1 = cld1;
     pln->cldt = cldt;
     pln->cld2 = cld2;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
     pln->vn = p->vn;

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     X(ops_add2)(&cldt->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cldt);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}
Esempio n. 15
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_rdft *p;
     P *pln;
     plan *cld = 0, *cldt_before = 0, *cldt_after = 0;
     R *I, *O;
     INT yblock, yb, nx, ny, vn;
     int my_pe, n_pes;
     static const plan_adt padt = {
          XM(rdft_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_rdft *) p_;

     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);
     
     nx = p->sz->dims[0].n;
     if (!(ny = XM(rearrange_ny)(ego->rearrange, p->sz->dims[0],p->vn,n_pes)))
	  return (plan *) 0;
     vn = p->vn / ny;
     A(ny * vn == p->vn);

     yblock = XM(default_block)(ny, n_pes);
     cldt_before = X(mkplan_d)(plnr,
			       XM(mkproblem_transpose)(
				    nx, ny, vn,
				    I = p->I, O = p->O,
				    p->sz->dims[0].b[IB], yblock,
				    p->comm, 0));
     if (XM(any_true)(!cldt_before, p->comm)) goto nada;	  
     if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { I = O; }
     
     yb = XM(block)(ny, yblock, my_pe);
     cld = X(mkplan_d)(plnr,
		       X(mkproblem_rdft_1_d)(X(mktensor_1d)(nx, vn, vn),
					     X(mktensor_2d)(yb, vn*nx, vn*nx,
							    vn, 1, 1),
					     O, I, p->kind[0]));
     if (XM(any_true)(!cld, p->comm)) goto nada;	  
     
     cldt_after = X(mkplan_d)(plnr,
			      XM(mkproblem_transpose)(
				   ny, nx, vn,
				   I, O,
				   yblock, p->sz->dims[0].b[OB], 
				   p->comm, 0));
     if (XM(any_true)(!cldt_after, p->comm)) goto nada;	  

     pln = MKPLAN_MPI_RDFT(P, &padt, apply);

     pln->cldt_before = cldt_before;
     pln->cld = cld;
     pln->cldt_after = cldt_after;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
     pln->rearrange = ego->rearrange;

     X(ops_add)(&cldt_before->ops, &cld->ops, &pln->super.super.ops);
     X(ops_add2)(&cldt_after->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldt_after);
     X(plan_destroy_internal)(cld);
     X(plan_destroy_internal)(cldt_before);
     return (plan *) 0;
}
Esempio n. 16
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_rdft *p;
     P *pln;
     plan *cld1 = 0, *cldt = 0, *cld2 = 0;
     R *I, *O, *I2;
     tensor *sz;
     int i, my_pe, n_pes;
     INT nrest;
     static const plan_adt padt = {
          XM(rdft_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_rdft *) p_;

     I2 = I = p->I;
     O = p->O;
     if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) 
	  I = O; 
     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);

     sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
     i = p->sz->rnk - 2; A(i >= 0);
     sz->dims[i].n = p->sz->dims[i+1].n;
     sz->dims[i].is = sz->dims[i].os = p->vn;
     for (--i; i >= 0; --i) {
	  sz->dims[i].n = p->sz->dims[i+1].n;
	  sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
     }
     nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
     {
          INT is = sz->dims[0].n * sz->dims[0].is;
          INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
	  cld1 = X(mkplan_d)(plnr,
                             X(mkproblem_rdft_d)(sz,
						 X(mktensor_2d)(b, is, is,
								p->vn, 1, 1),
						 I2, I, p->kind + 1));
	  if (XM(any_true)(!cld1, p->comm)) goto nada;
     }

     nrest *= p->vn;
     cldt = X(mkplan_d)(plnr,
			XM(mkproblem_transpose)(
			     p->sz->dims[0].n, p->sz->dims[1].n, nrest,
			     I, O,
			     p->sz->dims[0].b[IB], p->sz->dims[1].b[OB], 
			     p->comm, 0));
     if (XM(any_true)(!cldt, p->comm)) goto nada;

     {
	  INT is = p->sz->dims[0].n * nrest;
	  INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe);
	  cld2 = X(mkplan_d)(plnr,
			     X(mkproblem_rdft_1_d)(X(mktensor_1d)(
							p->sz->dims[0].n,
							nrest, nrest),
						   X(mktensor_2d)(b, is, is,
								  nrest, 1, 1),
						   O, O, p->kind[0]));
	  if (XM(any_true)(!cld2, p->comm)) goto nada;
     }

     pln = MKPLAN_MPI_RDFT(P, &padt, apply);
     pln->cld1 = cld1;
     pln->cldt = cldt;
     pln->cld2 = cld2;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     X(ops_add2)(&cldt->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cldt);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}
Esempio n. 17
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_transpose *p;
     P *pln;
     plan *cld1 = 0, *cld2 = 0, *cld2rest = 0, *cld3 = 0;
     INT b, bt, vn, rest_Ioff, rest_Ooff;
     INT *sbs, *sbo, *rbs, *rbo;
     int pe, my_pe, n_pes, sort_pe = -1, ascending = 1;
     R *I, *O;
     static const plan_adt padt = {
          XM(transpose_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_transpose *) p_;
     vn = p->vn;
     I = p->I; O = p->O;

     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);

     b = XM(block)(p->nx, p->block, my_pe);
     
     if (!(p->flags & TRANSPOSED_IN)) { /* b x ny x vn -> ny x b x vn */
	  cld1 = X(mkplan_f_d)(plnr, 
			       X(mkproblem_rdft_0_d)(X(mktensor_3d)
						     (b, p->ny * vn, vn,
						      p->ny, vn, b * vn,
						      vn, 1, 1),
						     I, O),
			       0, 0, NO_SLOW);
	  if (XM(any_true)(!cld1, p->comm)) goto nada;
     }
     if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) I = O;

     if (XM(any_true)(!XM(mkplans_posttranspose)(p, plnr, I, O, my_pe,
						 &cld2, &cld2rest, &cld3,
						 &rest_Ioff, &rest_Ooff),
		      p->comm)) goto nada;

     pln = MKPLAN_MPI_TRANSPOSE(P, &padt, apply);

     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->cld2rest = cld2rest;
     pln->rest_Ioff = rest_Ioff;
     pln->rest_Ooff = rest_Ooff;
     pln->cld3 = cld3;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);

     MPI_Comm_dup(p->comm, &pln->comm);

     n_pes = (int) X(imax)(XM(num_blocks)(p->nx, p->block),
			   XM(num_blocks)(p->ny, p->tblock));

     /* Compute sizes/offsets of blocks to exchange between processors */
     sbs = (INT *) MALLOC(4 * n_pes * sizeof(INT), PLANS);
     sbo = sbs + n_pes;
     rbs = sbo + n_pes;
     rbo = rbs + n_pes;
     b = XM(block)(p->nx, p->block, my_pe);
     bt = XM(block)(p->ny, p->tblock, my_pe);
     for (pe = 0; pe < n_pes; ++pe) {
	  INT db, dbt; /* destination block sizes */
	  db = XM(block)(p->nx, p->block, pe);
	  dbt = XM(block)(p->ny, p->tblock, pe);

	  sbs[pe] = b * dbt * vn;
	  sbo[pe] = pe * (b * p->tblock) * vn;
	  rbs[pe] = db * bt * vn;
	  rbo[pe] = pe * (p->block * bt) * vn;

	  if (db * dbt > 0 && db * p->tblock != p->block * dbt) {
	       A(sort_pe == -1); /* only one process should need sorting */
	       sort_pe = pe;
	       ascending = db * p->tblock > p->block * dbt;
	  }
     }
     pln->n_pes = n_pes;
     pln->my_pe = my_pe;
     pln->send_block_sizes = sbs;
     pln->send_block_offsets = sbo;
     pln->recv_block_sizes = rbs;
     pln->recv_block_offsets = rbo;

     if (my_pe >= n_pes) {
	  pln->sched = 0; /* this process is not doing anything */
     }
     else {
	  pln->sched = (int *) MALLOC(n_pes * sizeof(int), PLANS);
	  fill1_comm_sched(pln->sched, my_pe, n_pes);
	  if (sort_pe >= 0)
	       sort1_comm_sched(pln->sched, n_pes, sort_pe, ascending);
     }

     X(ops_zero)(&pln->super.super.ops);
     if (cld1) X(ops_add2)(&cld1->ops, &pln->super.super.ops);
     if (cld2) X(ops_add2)(&cld2->ops, &pln->super.super.ops);
     if (cld2rest) X(ops_add2)(&cld2rest->ops, &pln->super.super.ops);
     if (cld3) X(ops_add2)(&cld3->ops, &pln->super.super.ops);
     /* FIXME: should MPI operations be counted in "other" somehow? */

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld3);
     X(plan_destroy_internal)(cld2rest);
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}
Esempio n. 18
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_rdft2 *p;
     P *pln;
     plan *cld1 = 0, *cld2 = 0;
     R *r0, *r1, *cr, *ci, *I, *O;
     tensor *sz;
     dtensor *sz2;
     int i, my_pe, n_pes;
     INT nrest;
     static const plan_adt padt = {
          XM(rdft2_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_rdft2 *) p_;

     I = p->I; O = p->O;
     if (p->kind == R2HC) {
          r1 = (r0 = p->I) + p->vn;
	  if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) {
	       ci = (cr = p->O) + 1;
	       I = O; 
	  }
	  else 
	       ci = (cr = p->I) + 1;
     }
     else {
          r1 = (r0 = p->O) + p->vn;
          ci = (cr = p->O) + 1;
     }

     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);

     sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
     i = p->sz->rnk - 2; A(i >= 0);
     sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
     sz->dims[i].n = p->sz->dims[i+1].n / 2 + 1;
     for (--i; i >= 0; --i) {
	  sz->dims[i].n = p->sz->dims[i+1].n;
	  sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
     }
     nrest = X(tensor_sz)(sz);
     {
	  INT ivs = 1 + (p->kind == HC2R), ovs = 1 + (p->kind == R2HC);
          INT is = sz->dims[0].n * sz->dims[0].is;
          INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
	  sz->dims[p->sz->rnk - 2].n = p->sz->dims[p->sz->rnk - 1].n;
	  cld1 = X(mkplan_d)(plnr,
                             X(mkproblem_rdft2_d)(sz,
						  X(mktensor_2d)(b, is, is,
							        p->vn,ivs,ovs),
						  r0, r1, cr, ci, p->kind));
	  if (XM(any_true)(!cld1, p->comm)) goto nada;
     }

     sz2 = XM(mkdtensor)(1); /* tensor for first (distributed) dimension */
     sz2->dims[0] = p->sz->dims[0];
     cld2 = X(mkplan_d)(plnr, XM(mkproblem_dft_d)(sz2, nrest * p->vn,
						  I, O, p->comm, 
						  p->kind == R2HC ?
						  FFT_SIGN : -FFT_SIGN,
						  RANK1_BIGVEC_ONLY));
     if (XM(any_true)(!cld2, p->comm)) goto nada;

     pln = MKPLAN_MPI_RDFT2(P, &padt, p->kind == R2HC ? apply_r2c : apply_c2r);
     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
     pln->vn = p->vn;

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_dft *p;
     P *pln;
     plan *cld1 = 0, *cldt = 0, *cld2 = 0;
     R *ri, *ii, *ro, *io, *I, *O;
     tensor *sz;
     int i, my_pe, n_pes;
     INT nrest;
     static const plan_adt padt = {
          XM(dft_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_dft *) p_;

     X(extract_reim)(p->sign, I = p->I, &ri, &ii);
     X(extract_reim)(p->sign, O = p->O, &ro, &io);
     if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) 
	  I = O; 
     else { 
	  ro = ri;
	  io = ii;
     }
     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);

     sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */
     i = p->sz->rnk - 2; A(i >= 0);
     sz->dims[i].n = p->sz->dims[i+1].n;
     sz->dims[i].is = sz->dims[i].os = 2 * p->vn;
     for (--i; i >= 0; --i) {
	  sz->dims[i].n = p->sz->dims[i+1].n;
	  sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is;
     }
     nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n;
     {
          INT is = sz->dims[0].n * sz->dims[0].is;
          INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe);
	  cld1 = X(mkplan_d)(plnr,
                             X(mkproblem_dft_d)(sz,
                                                X(mktensor_2d)(b, is, is,
                                                               p->vn, 2, 2),
                                                ri, ii, ro, io));
	  if (XM(any_true)(!cld1, p->comm)) goto nada;
     }

     nrest *= p->vn;
     cldt = X(mkplan_d)(plnr,
			XM(mkproblem_transpose)(
			     p->sz->dims[0].n, p->sz->dims[1].n, nrest * 2,
			     I, O,
			     p->sz->dims[0].b[IB], p->sz->dims[1].b[OB], 
			     p->comm, 0));
     if (XM(any_true)(!cldt, p->comm)) goto nada;

     X(extract_reim)(p->sign, O, &ro, &io);
     {
	  INT is = p->sz->dims[0].n * nrest * 2;
	  INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe);
	  cld2 = X(mkplan_d)(plnr,
			     X(mkproblem_dft_d)(X(mktensor_1d)(
						     p->sz->dims[0].n,
						     nrest * 2, nrest * 2),
						X(mktensor_2d)(b, is, is,
							       nrest, 2, 2),
						ro, io, ro, io));
	  if (XM(any_true)(!cld2, p->comm)) goto nada;
     }

     pln = MKPLAN_MPI_DFT(P, &padt, apply);
     pln->cld1 = cld1;
     pln->cldt = cldt;
     pln->cld2 = cld2;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
     pln->roff = ri - p->I;
     pln->ioff = ii - p->I;

     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
     X(ops_add2)(&cldt->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cldt);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}
Esempio n. 20
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_transpose *p;
     P *pln;
     plan *cld1 = 0, *cldtr = 0, *cldtm = 0;
     R *I, *O;
     int me, np, r, m;
     INT b;
     MPI_Comm comm2;
     static const plan_adt padt = {
          XM(transpose_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr, &r))
          return (plan *) 0;

     p = (const problem_mpi_transpose *) p_;

     MPI_Comm_size(p->comm, &np);
     MPI_Comm_rank(p->comm, &me);
     m = np / r;
     A(r * m == np);

     I = p->I; O = p->O;

     b = XM(block)(p->nx, p->block, me);
     A(p->tblock * np == p->ny); /* this is currently required for cld1 */
     if (p->flags & TRANSPOSED_IN) { 
          /* m x r x (bt x b x vn) -> r x m x (bt x b x vn) */
	  INT vn = p->vn * b * p->tblock;
	  cld1 = X(mkplan_f_d)(plnr,
                               X(mkproblem_rdft_0_d)(X(mktensor_3d)
						     (m, r*vn, vn,
						      r, vn, m*vn,
						      vn, 1, 1),
                                                     I, O),
                               0, 0, NO_SLOW);
     }
     else if (I != O) { /* combine cld1 with TRANSPOSED_IN permutation */
          /* b x m x r x bt x vn -> r x m x bt x b x vn */
	  INT vn = p->vn;
	  INT bt = p->tblock;
	  cld1 = X(mkplan_f_d)(plnr,
                               X(mkproblem_rdft_0_d)(X(mktensor_5d)
						     (b, m*r*bt*vn, vn,
						      m, r*bt*vn, bt*b*vn,
						      r, bt*vn, m*bt*b*vn,
						      bt, vn, b*vn,
						      vn, 1, 1),
                                                     I, O),
                               0, 0, NO_SLOW);
     }
     else { /* TRANSPOSED_IN permutation must be separate for in-place */
	  /* b x (m x r) x bt x vn -> b x (r x m) x bt x vn */
	  INT vn = p->vn * p->tblock;
	  cld1 = X(mkplan_f_d)(plnr,
                               X(mkproblem_rdft_0_d)(X(mktensor_4d)
						     (m, r*vn, vn,
						      r, vn, m*vn,
						      vn, 1, 1,
						      b, np*vn, np*vn),
                                                     I, O),
                               0, 0, NO_SLOW);
     }
     if (XM(any_true)(!cld1, p->comm)) goto nada;

     if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) I = O;

     b = XM(block)(p->nx, r * p->block, me / r);
     MPI_Comm_split(p->comm, me / r, me, &comm2);
     if (b)
	  cldtr = X(mkplan_d)(plnr, XM(mkproblem_transpose)
			      (b, p->ny, p->vn,
			       O, I, p->block, m * p->tblock, comm2, 
			       p->I != p->O
			       ? TRANSPOSED_IN : (p->flags & TRANSPOSED_IN)));
     MPI_Comm_free(&comm2);
     if (XM(any_true)(b && !cldtr, p->comm)) goto nada;
     
     b = XM(block)(p->ny, m * p->tblock, me % r);
     MPI_Comm_split(p->comm, me % r, me, &comm2);
     if (b)
	  cldtm = X(mkplan_d)(plnr, XM(mkproblem_transpose)
			      (p->nx, b, p->vn,
			       I, O, r * p->block, p->tblock, comm2, 
			       TRANSPOSED_IN | (p->flags & TRANSPOSED_OUT)));
     MPI_Comm_free(&comm2);
     if (XM(any_true)(b && !cldtm, p->comm)) goto nada;

     pln = MKPLAN_MPI_TRANSPOSE(P, &padt, apply);

     pln->cld1 = cld1;
     pln->cldtr = cldtr;
     pln->cldtm = cldtm;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);
     pln->r = r;
     pln->nam = ego->nam;

     pln->super.super.ops = cld1->ops;
     if (cldtr) X(ops_add2)(&cldtr->ops, &pln->super.super.ops);
     if (cldtm) X(ops_add2)(&cldtm->ops, &pln->super.super.ops);

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cldtm);
     X(plan_destroy_internal)(cldtr);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}