static int applicable(const problem *p_, const planner *plnr) { if (!applicable0(p_)) return 0; { const problem_dft *p = (const problem_dft *) p_; if (NO_UGLYP(plnr) && DFT_R2HC_ICKYP(plnr)) return 0; if (p->sz->rnk == 1 && split(p->ri, p->ii, p->sz->dims[0].n, p->sz->dims[0].is) && split(p->ro, p->io, p->sz->dims[0].n, p->sz->dims[0].os)) return 1; return !(NO_UGLYP(plnr)); } }
static int applicable(const S *ego, INT r, INT irs, INT ors, INT m, INT ms, INT v, INT ivs, INT ovs, INT mb, INT me, R *rio, R *iio, const planner *plnr, INT *extra_iter) { if (ego->bufferedp) { *extra_iter = 0; if (!applicable0_buf(ego, r, irs, ors, m, ms, v, ivs, ovs, mb, me, rio, iio, plnr)) return 0; } else { if (!applicable0(ego, r, irs, ors, m, ms, v, ivs, ovs, mb, me, rio, iio, plnr, extra_iter)) return 0; } if (NO_UGLYP(plnr) && X(ct_uglyp)((ego->bufferedp? (INT)512 : (INT)16), v, m * r, r)) return 0; if (m * r > 262144 && NO_FIXED_RADIX_LARGE_NP(plnr)) return 0; return 1; }
static int applicable(const solver *ego_, const problem *p_, const planner *plnr, int *dp) { const S *ego = (const S *)ego_; if (!applicable0(ego_, p_, dp)) return 0; /* fftw2 behavior */ if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) return 0; if (NO_UGLYP(plnr)) { const problem_rdft2 *p = (const problem_rdft2 *) p_; iodim *d = p->vecsz->dims + *dp; /* Heuristic: if the transform is multi-dimensional, and the vector stride is less than the transform size, then we probably want to use a rank>=2 plan first in order to combine this vector with the transform-dimension vectors. */ if (p->sz->rnk > 1 && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) < X(rdft2_tensor_max_index)(p->sz, p->kind) ) return 0; /* Heuristic: don't use a vrank-geq1 for rank-0 vrank-1 transforms, since this case is better handled by rank-0 solvers. */ if (p->sz->rnk == 0 && p->vecsz->rnk == 1) return 0; if (NO_NONTHREADEDP(plnr)) return 0; /* prefer threaded version */ } return 1; }
static int applicable(const solver *ego_, const problem *p_, const planner *plnr, int *dp) { const S *ego = (const S *)ego_; const problem_dft *p; if (!applicable0(ego_, p_, dp)) return 0; /* fftw2 behavior */ if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0])) return 0; p = (const problem_dft *) p_; if (NO_UGLYP(plnr)) { /* Heuristic: if the transform is multi-dimensional, and the vector stride is less than the transform size, then we probably want to use a rank>=2 plan first in order to combine this vector with the transform-dimension vectors. */ { iodim *d = p->vecsz->dims + *dp; if (1 && p->sz->rnk > 1 && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) < X(tensor_max_index)(p->sz) ) return 0; } if (NO_NONTHREADEDP(plnr)) return 0; /* prefer threaded version */ } return 1; }
static int applicable(const S *ego, const problem *p_, const planner *plnr) { if (NO_BUFFERINGP(plnr)) return 0; if (!applicable0(ego, p_, plnr)) return 0; if (NO_UGLYP(plnr)) { const problem_dft *p = (const problem_dft *) p_; if (p->ri != p->ro) return 0; if (X(toobig)(p->sz->dims[0].n)) return 0; } return 1; }
static int applicable(const S *ego, rdft_kind kind, INT r, INT m, INT v, const planner *plnr) { if (!applicable0(ego, kind, r)) return 0; if (NO_UGLYP(plnr) && X(ct_uglyp)((ego->bufferedp? (INT)512 : (INT)16), v, m * r, r)) return 0; return 1; }
static int applicable(const S *ego, rdft_kind kind, INT r, INT m, INT s, INT vl, INT vs, INT mstart1, INT mcount2, R *IO, const planner *plnr) { if (!applicable0(ego, kind, r, m, s, vl, vs, mstart1, mcount2, IO)) return 0; if (NO_UGLYP(plnr) && X(ct_uglyp)(16, m * r, r)) return 0; return 1; }
static int applicable(const problem *p_, const S *ego, const planner *plnr) { const problem_rdft2 *p; if (NO_BUFFERINGP(plnr)) return 0; if (!applicable0(p_, ego, plnr)) return 0; p = (const problem_rdft2 *) p_; if (NO_UGLYP(plnr)) { if (p->r0 != p->cr) return 0; if (X(toobig)(p->sz->dims[0].n)) return 0; } return 1; }
static int applicable(const S *ego, const problem *p_, const planner *plnr) { const problem_rdft *p; if (NO_BUFFERINGP(plnr)) return 0; if (!applicable0(ego, p_, plnr)) return 0; p = (const problem_rdft *) p_; if (p->kind[0] == HC2R) { if (NO_UGLYP(plnr)) { /* UGLY if in-place and too big, since the problem could be solved via transpositions */ if (p->I == p->O && fftwf_toobig(p->sz->dims[0].n)) return 0; } } else { if (NO_UGLYP(plnr)) { if (p->I != p->O) return 0; if (fftwf_toobig(p->sz->dims[0].n)) return 0; } } return 1; }
static int applicable(const problem *p_, const planner *plnr) { const problem_rdft2 *p; if (NO_BUFFERINGP(plnr)) return 0; if (!applicable0(p_, plnr)) return 0; p = (const problem_rdft2 *) p_; if (p->kind == HC2R) { if (NO_UGLYP(plnr)) { /* UGLY if in-place and too big, since the problem could be solved via transpositions */ if (p->r0 == p->cr && X(toobig)(p->sz->dims[0].n)) return 0; } } else { if (NO_UGLYP(plnr)) { if (p->r0 != p->cr || X(toobig)(p->sz->dims[0].n)) return 0; } } return 1; }
static int applicable(const problem *p_, const planner *plnr) { if (DFTP(p_)) { const problem_dft *p = (const problem_dft *)p_; const iodim *d = p->vecsz->dims; return (1 && p->ri == p->ro && p->sz->rnk == 0 && p->vecsz->rnk == 2 && X(transposable)(d, d+1, 1, X(imin)(d[0].is,d[0].os), p->ri, p->ii) && (!NO_UGLYP(plnr) || d[0].n == d[1].n) ); } return 0; }
static int applicable_buf(const solver *ego_, const problem *p_, const planner *plnr) { const S *ego = (const S *) ego_; const problem_dft *p = (const problem_dft *) p_; const kdft_desc *d = ego->desc; INT vl; INT ivs, ovs; INT batchsz; return ( 1 && p->sz->rnk == 1 && p->vecsz->rnk == 1 && p->sz->dims[0].n == d->sz /* check strides etc */ && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs) /* UGLY if IS <= IVS */ && !(NO_UGLYP(plnr) && X(iabs)(p->sz->dims[0].is) <= X(iabs)(ivs)) && (batchsz = compute_batchsize(d->sz), 1) && (d->genus->okp(d, 0, ((const R *)0) + 1, p->ro, p->io, 2 * batchsz, p->sz->dims[0].os, batchsz, 2, ovs, plnr)) && (d->genus->okp(d, 0, ((const R *)0) + 1, p->ro, p->io, 2 * batchsz, p->sz->dims[0].os, vl % batchsz, 2, ovs, plnr)) && (0 /* can operate out-of-place */ || p->ri != p->ro /* can operate in-place as long as strides are the same */ || X(tensor_inplace_strides2)(p->sz, p->vecsz) /* can do it if the problem fits in the buffer, no matter what the strides are */ || vl <= batchsz ) ); }
static int applicable(const solver_hc2hc *ego, const problem *p_, const planner *plnr) { const problem_rdft *p; if (!applicable0(ego, p_, plnr)) return 0; p = (const problem_rdft *) p_; /* emulate fftw2 behavior */ if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0)) return 0; if (NO_UGLYP(plnr)) { if (X(ct_uglyp)(16, p->sz->dims[0].n, ego->desc->radix)) return 0; if (NONTHREADED_ICKYP(plnr)) return 0; /* prefer threaded version */ } return 1; }
/* TODO: revise this. */ static int applicable(const solver *ego_, const problem *p_, const planner *plnr, int *rp) { const S *ego = (const S *)ego_; const problem_dft *p = (const problem_dft *) p_; if (!applicable0(ego_, p_, rp)) return 0; /* fixed spltrnk (unlike fftw2's spltrnk=1, default buddies[0] is spltrnk=0, which is an asymptotic "theoretical optimum" for an ideal cache; it's equivalent to spltrnk=1 for rnk < 4). */ if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) return 0; /* Heuristic: if the vector stride is greater than the transform sz, don't use (prefer to do the vector loop first with a vrank-geq1 plan). */ if (NO_UGLYP(plnr)) if (p->vecsz->rnk > 0 && X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz)) return 0; return 1; }
/* TODO: revise this. */ static int applicable(const solver *ego_, const problem *p_, const planner *plnr, int *rp) { const S *ego = (const S *)ego_; if (!applicable0(ego_, p_, rp)) return 0; if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) return 0; if (NO_UGLYP(plnr)) { /* Heuristic: if the vector stride is greater than the transform sz, don't use (prefer to do the vector loop first with a vrank-geq1 plan). */ const problem_rdft *p = (const problem_rdft *) p_; if (p->vecsz->rnk > 0 && X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz)) return 0; } return 1; }
static int applicable(const S *ego, rdft_kind kind, INT r, INT rs, INT m, INT ms, INT v, INT vs, R *cr, R *ci, const planner *plnr, INT *extra_iter) { if (ego->bufferedp) { if (!applicable0_buf(ego, kind, r, rs, m, ms, v, vs, cr, ci, plnr, extra_iter)) return 0; } else { if (!applicable0(ego, kind, r, rs, m, ms, v, vs, cr, ci, plnr, extra_iter)) return 0; } if (NO_UGLYP(plnr) && X(ct_uglyp)((ego->bufferedp? (INT)512 : (INT)16), m * r, r)) return 0; return 1; }
static int applicable(const solver *ego_, const problem *p_, const planner *plnr, int *pdim0, int *pdim1) { if (!applicable0(ego_, p_, plnr, pdim0, pdim1)) return 0; { const problem_dft *p = (const problem_dft *) p_; INT u = p->ri == p->ii + 1 || p->ii == p->ri + 1 ? (INT)2 : (INT)1; /* UGLY if does not result in contiguous transforms or transforms of contiguous vectors (since the latter at least have efficient transpositions) */ if (NO_UGLYP(plnr) && p->vecsz->dims[*pdim0].is != u && !(p->vecsz->rnk == 2 && p->vecsz->dims[1-*pdim0].is == u && p->vecsz->dims[*pdim0].is == u * p->vecsz->dims[1-*pdim0].n)) return 0; if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0; } return 1; }
static int applicable(const solver *ego, const problem *p, const planner *plnr) { return (!NO_UGLYP(plnr) && applicable0(ego, p)); }
static int applicable_dit(const solver *ego_, const problem *p_, const planner *plnr) { return (!NO_UGLYP(plnr) && applicable0_dit(ego_, p_)); }