Пример #1
0
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     iodim *d = p->sz->dims;

     if (1
	 && p->vecsz->rnk <= 1
	 && p->sz->rnk == 1
	  ) {
	  INT vl, ivs, ovs;
	  fftwf_tensor_tornk1(p->vecsz, &vl, &ivs, &ovs);

	  if (fftwf_toobig(d[0].n) && CONSERVE_MEMORYP(plnr))
	       return 0;

	  /* if this solver is redundant, in the sense that a solver
	     of lower index generates the same plan, then prune this
	     solver */
	  if (fftwf_nbuf_redundant(d[0].n, vl,
				ego->maxnbuf_ndx,
				maxnbufs, NELEM(maxnbufs)))
	       return 0;

	  if (p->I != p->O) {
	       if (p->kind[0] == HC2R) {
		    /* Allow HC2R problems only if the input is to be
		       preserved.  This solver sets NO_DESTROY_INPUT,
		       which prevents infinite loops */
		    return (NO_DESTROY_INPUTP(plnr));
	       } else {
		    /*
		      In principle, the buffered transforms might be useful
		      when working out of place.  However, in order to
		      prevent infinite loops in the planner, we require
		      that the output stride of the buffered transforms be
		      greater than 1.
		    */
		    return (d[0].os > 1);
	       }
	  }

	  /*
	   * If the problem is in place, the input/output strides must
	   * be the same or the whole thing must fit in the buffer.
	   */
	  if (fftwf_tensor_inplace_strides2(p->sz, p->vecsz))
	       return 1;

	  if (/* fits into buffer: */
	       ((p->vecsz->rnk == 0)
		||
		(fftwf_nbuf(d[0].n, p->vecsz->dims[0].n,
			 maxnbufs[ego->maxnbuf_ndx])
		 == p->vecsz->dims[0].n)))
	       return 1;
     }

     return 0;
}
static int applicable0(const problem *p_, const planner *plnr)
{
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     iodim *d = p->sz->dims;

     if (1
	 && p->vecsz->rnk <= 1
	 && p->sz->rnk == 1

	 /* we assume even n throughout */
	 && (p->sz->dims[0].n % 2) == 0

	 /* and we only consider these two cases */
	 && (p->kind == R2HC || p->kind == HC2R)

	  ) {

	  if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
	       return 0;

	  if (p->r0 != p->cr) {
	       if (p->kind == HC2R) {
		    /* Allow HC2R problems only if the input is to be
		       preserved.  This solver sets NO_DESTROY_INPUT,
		       which prevents infinite loops */
		    return (NO_DESTROY_INPUTP(plnr));
	       } else {
		    /*
		      In principle, the buffered transforms might be useful
		      when working out of place.  However, in order to
		      prevent infinite loops in the planner, we require
		      that the output stride of the buffered transforms be
		      greater than 2.
		    */
		    return (d[0].os > 2);
	       }
	  }

	  /*
	   * If the problem is in place, the input/output strides must
	   * be the same or the whole thing must fit in the buffer.
	   */
	  if (X(rdft2_inplace_strides(p, RNK_MINFTY)))
	       return 1;

	  if (/* fits into buffer: */
	       ((p->vecsz->rnk == 0)
		||
		(X(nbuf)(d[0].n, p->vecsz->dims[0].n) == p->vecsz->dims[0].n)))
	       return 1;
     }

     return 0;
}
Пример #3
0
static int applicable0(const S *ego, const problem *p_, const planner *plnr)
{
     const problem_dft *p = (const problem_dft *) p_;
     const iodim *d = p->sz->dims;

     if (1
	 && p->vecsz->rnk <= 1
	 && p->sz->rnk == 1
	  ) {
	  INT vl, ivs, ovs;
	  X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);

	  if (X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
	       return 0;

	  /* if this solver is redundant, in the sense that a solver
	     of lower index generates the same plan, then prune this
	     solver */
	  if (X(nbuf_redundant)(d[0].n, vl, 
				ego->maxnbuf_ndx,
				maxnbufs, NELEM(maxnbufs)))
	       return 0;

	  /*
	    In principle, the buffered transforms might be useful
	    when working out of place.  However, in order to
	    prevent infinite loops in the planner, we require
	    that the output stride of the buffered transforms be
	    greater than 2.
	  */
	  if (p->ri != p->ro)
	       return (d[0].os > 2);

	  /*
	   * If the problem is in place, the input/output strides must
	   * be the same or the whole thing must fit in the buffer.
	   */
	  if (X(tensor_inplace_strides2)(p->sz, p->vecsz))
	       return 1;

	  if (/* fits into buffer: */
	       ((p->vecsz->rnk == 0)
		||
		(X(nbuf)(d[0].n, p->vecsz->dims[0].n, 
			 maxnbufs[ego->maxnbuf_ndx]) 
		 == p->vecsz->dims[0].n)))
	       return 1;
     }

     return 0;
}
Пример #4
0
static int applicable0(const problem *p_, const S *ego, const planner *plnr)
{
     const problem_rdft2 *p = (const problem_rdft2 *) p_;
     UNUSED(ego);
     return(1
	    && p->vecsz->rnk <= 1
	    && p->sz->rnk == 1

	    /* FIXME: does it make sense to do R2HCII ? */
	    && (p->kind == R2HC || p->kind == HC2R)

	    /* real strides must allow for reduction to rdft */
	    && (2 * (p->r1 - p->r0) ==
		(((p->kind == R2HC) ? p->sz->dims[0].is : p->sz->dims[0].os)))

	    && !(X(toobig)(p->sz->dims[0].n) && CONSERVE_MEMORYP(plnr))
	  );
}
Пример #5
0
static int applicable(const S *ego, const problem *p_,
		      const planner *plnr, int *r)
{
     const problem_mpi_transpose *p = (const problem_mpi_transpose *) p_;
     int n_pes;
     MPI_Comm_size(p->comm, &n_pes);
     return (1
	     && p->tblock * n_pes == p->ny
	     && (!ego->preserve_input || (!NO_DESTROY_INPUTP(plnr)
                                          && p->I != p->O))
	     && (*r = ego->radix(n_pes)) && *r < n_pes && *r > 1
	     && enough_space(p->nx, p->ny, p->block, p->tblock, *r, n_pes)
	     && (!CONSERVE_MEMORYP(plnr) || *r > 8
		 || !X(toobig)((p->nx * (p->ny / n_pes) * p->vn) / *r))
	     && (!NO_SLOWP(plnr) || 
		 (p->nx * (p->ny / n_pes) * p->vn) / n_pes <= SMALL_MESSAGE)
	     && ONLY_TRANSPOSEDP(p->flags)
	  );
}