예제 #1
0
/* Check if the vecsz/sz strides are consistent with the problem
   being in-place for vecsz.dim[vdim], or for all dimensions
   if vdim == RNK_MINFTY.  We can't just use tensor_inplace_strides
   because rdft transforms have the unfortunate property of
   differing input and output sizes.   This routine is not
   exhaustive; we only return 1 for the most common case.  */
int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim)
{
     INT N, Nc;
     INT rs, cs;
     int i;
     
     for (i = 0; i + 1 < p->sz->rnk; ++i)
	  if (p->sz->dims[i].is != p->sz->dims[i].os)
	       return 0;

     if (!FINITE_RNK(p->vecsz->rnk) || p->vecsz->rnk == 0)
	  return 1;
     if (!FINITE_RNK(vdim)) { /* check all vector dimensions */
	  for (vdim = 0; vdim < p->vecsz->rnk; ++vdim)
	       if (!X(rdft2_inplace_strides)(p, vdim))
		    return 0;
	  return 1;
     }

     A(vdim < p->vecsz->rnk);
     if (p->sz->rnk == 0)
	  return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os);

     N = X(tensor_sz)(p->sz);
     Nc = (N / p->sz->dims[p->sz->rnk-1].n) *
	  (p->sz->dims[p->sz->rnk-1].n/2 + 1);
     X(rdft2_strides)(p->kind, p->sz->dims + p->sz->rnk - 1, &rs, &cs);

     /* the factor of 2 comes from the fact that RS is the stride
	of p->r0 and p->r1, which is twice as large as the strides
	in the r2r case */
     return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os
	    && (X(iabs)(2 * p->vecsz->dims[vdim].os)
		>= X(imax)(2 * Nc * X(iabs)(cs), N * X(iabs)(rs))));
}
/* Check if the vecsz/sz strides are consistent with the problem
   being in-place for vecsz.dim[vdim], or for all dimensions
   if vdim == RNK_MINFTY.  We can't just use tensor_inplace_strides
   because rdft transforms have the unfortunate property of
   differing input and output sizes.   This routine is not
   exhaustive; we only return 1 for the most common case.  */
int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim)
{
     int N, Nc;
     int is, os;
     int i;
     
     for (i = 0; i + 1 < p->sz->rnk; ++i)
	  if (p->sz->dims[i].is != p->sz->dims[i].os)
	       return 0;

     if (!FINITE_RNK(p->vecsz->rnk) || p->vecsz->rnk == 0)
	  return 1;
     if (!FINITE_RNK(vdim)) { /* check all vector dimensions */
	  for (vdim = 0; vdim < p->vecsz->rnk; ++vdim)
	       if (!X(rdft2_inplace_strides)(p, vdim))
		    return 0;
	  return 1;
     }

     A(vdim < p->vecsz->rnk);
     if (p->sz->rnk == 0)
	  return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os);

     N = X(tensor_sz)(p->sz);
     Nc = (N / p->sz->dims[p->sz->rnk-1].n) *
	  (p->sz->dims[p->sz->rnk-1].n/2 + 1);
     X(rdft2_strides)(p->kind, p->sz->dims + p->sz->rnk - 1, &is, &os);
     return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os
	    && X(iabs)(p->vecsz->dims[vdim].os)
	    >= X(imax)(Nc * X(iabs)(os), N * X(iabs)(is)));
}
예제 #3
0
파일: tensor7.c 프로젝트: bambang/vsipl
/* The inverse of X(tensor_append): splits the sz tensor into
   tensor a followed by tensor b, where a's rank is arnk. */
void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b)
{
     A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk));

     *a = X(tensor_copy_sub)(sz, 0, arnk);
     *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk);
}
예제 #4
0
파일: tensor.c 프로젝트: Aegisub/fftw3
tensor *X(mktensor)(int rnk) 
{
     tensor *x;

     A(rnk >= 0);

#if defined(STRUCT_HACK_KR)
     if (FINITE_RNK(rnk) && rnk > 1)
	  x = (tensor *)MALLOC(sizeof(tensor) + (rnk - 1) * sizeof(iodim),
				    TENSORS);
     else
	  x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
#elif defined(STRUCT_HACK_C99)
     if (FINITE_RNK(rnk))
	  x = (tensor *)MALLOC(sizeof(tensor) + rnk * sizeof(iodim),
				    TENSORS);
     else
	  x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
#else
     x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
     if (FINITE_RNK(rnk) && rnk > 0)
          x->dims = (iodim *)MALLOC(sizeof(iodim) * rnk, TENSORS);
     else
          x->dims = 0;
#endif

     x->rnk = rnk;
     return x;
}
예제 #5
0
void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e)
{
     C *inA, *inB, *inC, *outA, *outB, *outC, *tmp;
     int n, vecn, N;
     dofft_rdft2_closure k;

     BENCH_ASSERT(p->kind == PROBLEM_REAL);

     if (!FINITE_RNK(p->sz->rnk) || !FINITE_RNK(p->vecsz->rnk))
	  return;      /* give up */

     k.k.apply = rdft2_apply;
     k.k.recopy_input = 0;
     k.p = p;

     if (rounds == 0)
	  rounds = 20;  /* default value */

     n = tensor_sz(p->sz);
     vecn = tensor_sz(p->vecsz);
     N = n * vecn;

     inA = (C *) bench_malloc(N * sizeof(C));
     inB = (C *) bench_malloc(N * sizeof(C));
     inC = (C *) bench_malloc(N * sizeof(C));
     outA = (C *) bench_malloc(N * sizeof(C));
     outB = (C *) bench_malloc(N * sizeof(C));
     outC = (C *) bench_malloc(N * sizeof(C));
     tmp = (C *) bench_malloc(N * sizeof(C));

     e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, 
		    tmp, rounds, tol);
     e->l = linear(&k.k, 1, N, inA, inB, inC, outA, outB, outC,
		   tmp, rounds, tol);

     e->s = 0.0;
     if (p->sign < 0)
	  e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign,
				     inA, inB, outA, outB, 
				     tmp, rounds, tol, TIME_SHIFT));
     else
	  e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign,
				     inA, inB, outA, outB, 
				     tmp, rounds, tol, FREQ_SHIFT));
     
     if (!p->in_place && !p->destroy_input)
	  preserves_input(&k.k, p->sign < 0 ? mkreal : mkhermitian1,
			  N, inA, inB, outB, rounds);

     bench_free(tmp);
     bench_free(outC);
     bench_free(outB);
     bench_free(outA);
     bench_free(inC);
     bench_free(inB);
     bench_free(inA);
}
예제 #6
0
static int applicable0(const solver *ego_, const problem *p_, int *rp)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     const S *ego = (const S *)ego_;
     return (1
	     && FINITE_RNK(p->sz->rnk) && FINITE_RNK(p->vecsz->rnk)
	     && p->sz->rnk >= 2
	     && picksplit(ego, p->sz, rp)
	  );
}
예제 #7
0
파일: tensor5.c 프로젝트: Aegisub/fftw3
tensor *X(tensor_append)(const tensor *a, const tensor *b)
{
     if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) {
          return X(mktensor)(RNK_MINFTY);
     } else {
	  tensor *x = X(mktensor)(a->rnk + b->rnk);
          dimcpy(x->dims, a->dims, a->rnk);
          dimcpy(x->dims + a->rnk, b->dims, b->rnk);
	  return x;
     }
}
예제 #8
0
파일: problem.c 프로젝트: 376473984/fftw3
/* do what I mean */
static bench_tensor *dwim(bench_tensor *t, bench_iodim **last_iodim,
			  n_transform nti, n_transform nto,
			  bench_iodim *dt)
{
     int i;
     bench_iodim *d, *d1;

     if (!FINITE_RNK(t->rnk) || t->rnk < 1)
	  return t;

     i = t->rnk;
     d1 = *last_iodim;

     while (--i >= 0) {
	  d = t->dims + i;
	  if (!d->is) 
	       d->is = d1->is * transform_n(d1->n, d1==dt ? nti : SAME); 
	  if (!d->os) 
	       d->os = d1->os * transform_n(d1->n, d1==dt ? nto : SAME); 
	  d1 = d;
     }

     *last_iodim = d1;
     return t;
}
예제 #9
0
파일: tensor5.c 프로젝트: Aegisub/fftw3
static void dimcpy(iodim *dst, const iodim *src, int rnk)
{
     int i;
     if (FINITE_RNK(rnk))
          for (i = 0; i < rnk; ++i)
               dst[i] = src[i];
}
예제 #10
0
파일: tensor7.c 프로젝트: bambang/vsipl
/* Like tensor_compress, but also compress into one dimension any
   group of dimensions that form a contiguous block of indices with
   some stride.  (This can safely be done for transform vector sizes.) */
tensor *X(tensor_compress_contiguous)(const tensor *sz)
{
     int i, rnk;
     tensor *sz2, *x;

     if (X(tensor_sz)(sz) == 0) 
	  return X(mktensor)(RNK_MINFTY);

     sz2 = X(tensor_compress)(sz);
     A(FINITE_RNK(sz2->rnk));

     if (sz2->rnk < 2)		/* nothing to compress */
          return sz2;

     for (i = rnk = 1; i < sz2->rnk; ++i)
          if (!strides_contig(sz2->dims + i - 1, sz2->dims + i))
               ++rnk;

     x = X(mktensor)(rnk);
     x->dims[0] = sz2->dims[0];
     for (i = rnk = 1; i < sz2->rnk; ++i) {
          if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) {
               x->dims[rnk - 1].n *= sz2->dims[i].n;
               x->dims[rnk - 1].is = sz2->dims[i].is;
               x->dims[rnk - 1].os = sz2->dims[i].os;
          } else {
               A(rnk < x->rnk);
               x->dims[rnk++] = sz2->dims[i];
          }
     }

     X(tensor_destroy)(sz2);
     return x;
}
예제 #11
0
파일: tensor7.c 프로젝트: bambang/vsipl
/* Like tensor_copy, but eliminate n == 1 dimensions, which
   never affect any transform or transform vector.
 
   Also, we sort the tensor into a canonical order of decreasing
   is.  In general, processing a loop/array in order of
   decreasing stride will improve locality; sorting also makes the
   analysis in fftw_tensor_contiguous (below) easier.  The choice
   of is over os is mostly arbitrary, and hopefully
   shouldn't affect things much.  Normally, either the os will be
   in the same order as is (for e.g. multi-dimensional
   transforms) or will be in opposite order (e.g. for Cooley-Tukey
   recursion).  (Both forward and backwards traversal of the tensor
   are considered e.g. by vrank-geq1, so sorting in increasing
   vs. decreasing order is not really important.) */
tensor *X(tensor_compress)(const tensor *sz)
{
     int i, rnk;
     tensor *x;

     A(FINITE_RNK(sz->rnk));
     for (i = rnk = 0; i < sz->rnk; ++i) {
          A(sz->dims[i].n > 0);
          if (sz->dims[i].n != 1)
               ++rnk;
     }

     x = X(mktensor)(rnk);
     for (i = rnk = 0; i < sz->rnk; ++i) {
          if (sz->dims[i].n != 1)
               x->dims[rnk++] = sz->dims[i];
     }

     if (rnk > 1) {
	  qsort(x->dims, (size_t)x->rnk, sizeof(iodim),
		(int (*)(const void *, const void *))X(dimcmp));
     }

     return x;
}
예제 #12
0
problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz,
                          R *ri, R *ii, R *ro, R *io)
{
     problem_dft *ego =
          (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt);

     A((ri == ro) == (ii == io)); /* both in place or both out of place */
     A(X(tensor_kosherp)(sz));
     A(X(tensor_kosherp)(vecsz));

     /* enforce pointer equality if untainted pointers are equal */
     if (UNTAINT(ri) == UNTAINT(ro))
	  ri = ro = JOIN_TAINT(ri, ro);
     if (UNTAINT(ii) == UNTAINT(io))
	  ii = io = JOIN_TAINT(ii, io);

     /* more correctness conditions: */
     A(TAINTOF(ri) == TAINTOF(ii));
     A(TAINTOF(ro) == TAINTOF(io));

     ego->sz = X(tensor_compress)(sz);
     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
     ego->ri = ri;
     ego->ii = ii;
     ego->ro = ro;
     ego->io = io;

     A(FINITE_RNK(ego->sz->rnk));
     return &(ego->super);
}
예제 #13
0
파일: indirect.c 프로젝트: 376473984/fftw3
static int applicable0(const solver *ego_, const problem *p_,
		       const planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_rdft *p = (const problem_rdft *) p_;
     return (1
	     && FINITE_RNK(p->vecsz->rnk)

	     /* problem must be a nontrivial transform, not just a copy */
	     && p->sz->rnk > 0

	     && (0

		 /* problem must be in-place & require some
		    rearrangement of the data */
		 || (p->I == p->O
		     && !(X(tensor_inplace_strides2)(p->sz, p->vecsz)))

		 /* or problem must be out of place, transforming
		    from stride 1/2 to bigger stride, for apply_after */
		 || (p->I != p->O && ego->adt->apply == apply_after
		     && !NO_DESTROY_INPUTP(plnr)
		     && X(tensor_min_istride)(p->sz) <= 2
		     && X(tensor_min_ostride)(p->sz) > 2)
			  
		 /* or problem must be out of place, transforming
		    to stride 1/2 from bigger stride, for apply_before */
		 || (p->I != p->O && ego->adt->apply == apply_before
		     && X(tensor_min_ostride)(p->sz) <= 2
		     && X(tensor_min_istride)(p->sz) > 2)
			  
		  )
	  );
}
예제 #14
0
파일: dft-r2hc.c 프로젝트: 376473984/fftw3
static int applicable0(const problem *p_)
{
     const problem_dft *p = (const problem_dft *) p_;
     return ((p->sz->rnk == 1 && p->vecsz->rnk == 0)
	     || (p->sz->rnk == 0 && FINITE_RNK(p->vecsz->rnk))
	  );
}
예제 #15
0
static int tensor_rowmajor_transposedp(bench_tensor *t)
{
     bench_iodim *d;
     int i;

     BENCH_ASSERT(FINITE_RNK(t->rnk));
     if (t->rnk < 2)
	  return 0;

     d = t->dims;
     if (d[0].is != d[1].is * d[1].n
	 || d[0].os != d[1].is
	 || d[1].os != d[0].os * d[0].n)
	  return 0;
     if (t->rnk > 2 && d[1].is != d[2].is * d[2].n)
	  return 0;
     for (i = 2; i + 1 < t->rnk; ++i) {
          d = t->dims + i;
          if (d[0].is != d[1].is * d[1].n
	      || d[0].os != d[1].os * d[1].n)
               return 0;
     }

     if (t->rnk > 2 && t->dims[t->rnk-1].is != t->dims[t->rnk-1].os)
	  return 0;
     return 1;
}
예제 #16
0
파일: problem2.c 프로젝트: Aegisub/fftw3
problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz,
			    R *r0, R *r1, R *cr, R *ci,
			    rdft_kind kind)
{
     problem_rdft2 *ego;

     A(kind == R2HC || kind == R2HCII || kind == HC2R || kind == HC2RIII);
     A(X(tensor_kosherp)(sz));
     A(X(tensor_kosherp)(vecsz));
     A(FINITE_RNK(sz->rnk));

     /* require in-place problems to use r0 == cr */
     if (UNTAINT(r0) == UNTAINT(ci))
	  return X(mkproblem_unsolvable)();

     /* FIXME: should check UNTAINT(r1) == UNTAINT(cr) but
	only if odd elements exist, which requires compressing the 
	tensors first */

     if (UNTAINT(r0) == UNTAINT(cr)) 
	  r0 = cr = JOIN_TAINT(r0, cr);

     ego = (problem_rdft2 *)X(mkproblem)(sizeof(problem_rdft2), &padt);

     if (sz->rnk > 1) { /* have to compress rnk-1 dims separately, ugh */
	  tensor *szc = X(tensor_copy_except)(sz, sz->rnk - 1);
	  tensor *szr = X(tensor_copy_sub)(sz, sz->rnk - 1, 1);
	  tensor *szcc = X(tensor_compress)(szc);
	  if (szcc->rnk > 0)
	       ego->sz = X(tensor_append)(szcc, szr);
	  else
	       ego->sz = X(tensor_compress)(szr);
	  X(tensor_destroy2)(szc, szr); X(tensor_destroy)(szcc);
     } else {
	  ego->sz = X(tensor_compress)(sz);
     }
     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
     ego->r0 = r0;
     ego->r1 = r1;
     ego->cr = cr;
     ego->ci = ci;
     ego->kind = kind;

     A(FINITE_RNK(ego->sz->rnk));
     return &(ego->super);

}
예제 #17
0
파일: problem.c 프로젝트: 376473984/fftw3
static void transpose_tensor(bench_tensor *t)
{
     if (!FINITE_RNK(t->rnk) || t->rnk < 2)
          return;

     t->dims[0].os = t->dims[1].os;
     t->dims[1].os = t->dims[0].os * t->dims[0].n;
}
예제 #18
0
파일: block.c 프로젝트: Aegisub/fftw3
/* Return whether sz is distributed for k according to a simple
   1d block distribution in the first or second dimensions */
int XM(is_block1d)(const dtensor *sz, block_kind k)
{
     int i;
     if (!FINITE_RNK(sz->rnk)) return 0;
     for (i = 0; i < sz->rnk && num_blocks_kind(sz->dims + i, k) == 1; ++i) ;
     return(i < sz->rnk && i < 2 && XM(is_local_after)(i + 1, sz, k));

}
예제 #19
0
파일: block.c 프로젝트: Aegisub/fftw3
/* returns whether sz is local for dims >= dim */
int XM(is_local_after)(int dim, const dtensor *sz, block_kind k)
{
     if (FINITE_RNK(sz->rnk))
	  for (; dim < sz->rnk; ++dim)
	       if (XM(num_blocks)(sz->dims[dim].n, sz->dims[dim].b[k]) > 1)
		    return 0;
     return 1;
}
예제 #20
0
static int applicable(const solver *ego_, const problem *p_)
{
     const problem_dft *p = (const problem_dft *) p_;

     UNUSED(ego_);

     return 0
	  /* case 1 : -infty vector rank */
	  || (!FINITE_RNK(p->vecsz->rnk))

	  /* case 2 : rank-0 in-place dft */
	  || (1
	      && p->sz->rnk == 0
	      && FINITE_RNK(p->vecsz->rnk)
	      && p->ro == p->ri
	      && X(tensor_inplace_strides)(p->vecsz)
	       );
}
예제 #21
0
파일: tensor5.c 프로젝트: Aegisub/fftw3
/* Like X(tensor_copy), but copy only rnk dimensions starting
   with start_dim. */
tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk)
{
     tensor *x;

     A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk);
     x = X(mktensor)(rnk);
     dimcpy(x->dims, sz->dims + start_dim, rnk);
     return x;
}
problem *XM(mkproblem_rdft)(const dtensor *sz, INT vn,
			    R *I, R *O,
			    MPI_Comm comm,
			    const rdft_kind *kind, unsigned flags)
{
     problem_mpi_rdft *ego;
     int i, rnk = sz->rnk;
     int n_pes;

     A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk));
     MPI_Comm_size(comm, &n_pes);
     A(n_pes >= XM(num_blocks_total)(sz, IB)
       && n_pes >= XM(num_blocks_total)(sz, OB));
     A(vn >= 0);

#if defined(STRUCT_HACK_KR)
     ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft)
					     + sizeof(rdft_kind)
					     * (rnk > 0 ? rnk - 1 : 0), &padt);
#elif defined(STRUCT_HACK_C99)
     ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft)
					     + sizeof(rdft_kind) * rnk, &padt);
#else
     ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft), &padt);
     ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS);
#endif

     /* enforce pointer equality if untainted pointers are equal */
     if (UNTAINT(I) == UNTAINT(O))
	  I = O = JOIN_TAINT(I, O);

     ego->sz = XM(dtensor_canonical)(sz, 0);
     ego->vn = vn;
     ego->I = I;
     ego->O = O;
     for (i = 0; i< ego->sz->rnk; ++i)
	  ego->kind[i] = kind[i];

     /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by
        swapping the first two dimensions (for rnk > 1) */
     if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) {
	  rdft_kind k = ego->kind[0];
	  ddim dim0 = ego->sz->dims[0];
	  ego->sz->dims[0] = ego->sz->dims[1];
	  ego->sz->dims[1] = dim0;
	  ego->kind[0] = ego->kind[1];
	  ego->kind[1] = k;
	  flags &= ~TRANSPOSED_IN;
	  flags ^= TRANSPOSED_OUT;
     }
     ego->flags = flags;

     MPI_Comm_dup(comm, &ego->comm);

     return &(ego->super);
}
예제 #23
0
static int applicable0(const solver *ego_, const problem *p_,
		       const planner *plnr,
		       int *pdim0, int *pdim1)
{
     const problem_dft *p = (const problem_dft *) p_;
     UNUSED(ego_); UNUSED(plnr);

     return (1
	     && FINITE_RNK(p->vecsz->rnk) && FINITE_RNK(p->sz->rnk)

	     /* FIXME: can/should we relax this constraint? */
	     && X(tensor_inplace_strides2)(p->vecsz, p->sz)

	     && pickdim(p->vecsz, p->sz, pdim0, pdim1)

	     /* output should not *already* include the transpose
		(in which case we duplicate the regular indirect.c) */
	     && (p->sz->dims[*pdim1].os != p->vecsz->dims[*pdim0].is)
	  );
}
예제 #24
0
파일: rank0.c 프로젝트: 376473984/fftw3
static int applicable(const S *ego, const problem *p_)
{
     const problem_rdft *p = (const problem_rdft *) p_;
     P pln;
     return (1
	     && p->sz->rnk == 0
	     && FINITE_RNK(p->vecsz->rnk)
	     && fill_iodim(&pln, p)
	     && ego->applicable(&pln, p)
	  );
}
예제 #25
0
/* return true (1) iff *any* strides of sz decrease when we
   tensor_inplace_copy(sz, k). */
static int tensor_strides_decrease(const tensor *sz, inplace_kind k)
{
     if (FINITE_RNK(sz->rnk)) {
          int i;
          for (i = 0; i < sz->rnk; ++i)
               if ((sz->dims[i].os - sz->dims[i].is)
                   * (k == INPLACE_OS ? (INT)1 : (INT)-1) < 0)
                    return 1;
     }
     return 0;
}
예제 #26
0
int X(tensor_inplace_strides)(const tensor *sz)
{
     int i;
     A(FINITE_RNK(sz->rnk));
     for (i = 0; i < sz->rnk; ++i) {
          const iodim *p = sz->dims + i;
          if (p->is != p->os)
               return 0;
     }
     return 1;
}
예제 #27
0
static void transpose_tensor(bench_tensor *t)
{
     int i;

     if (!FINITE_RNK(t->rnk) || t->rnk < 1)
          return;

     t->dims[0].os = t->dims[t->rnk - 1].os;
     for (i = 1; i < t->rnk; ++i)
	  t->dims[i].os = t->dims[i-1].os * t->dims[i-1].n;
}
예제 #28
0
파일: block.c 프로젝트: Aegisub/fftw3
/* Given a non-idle process which_pe, computes the coordinate
   vector coords[rnk] giving the coordinates of a block in the
   matrix of blocks.  k specifies whether we are talking about
   the input or output data distribution. */
void XM(block_coords)(const dtensor *sz, block_kind k, int which_pe, 
		     INT *coords)
{
     int i;
     A(!XM(idle_process)(sz, k, which_pe) && FINITE_RNK(sz->rnk));
     for (i = sz->rnk - 1; i >= 0; --i) {
	  INT nb = num_blocks_kind(sz->dims + i, k);
	  coords[i] = which_pe % nb;
	  which_pe /= nb;
     }
}
예제 #29
0
파일: tensor5.c 프로젝트: Aegisub/fftw3
/* Like X(tensor_copy), but copy all of the dimensions *except*
   except_dim. */
tensor *X(tensor_copy_except)(const tensor *sz, int except_dim)
{
     tensor *x;

     A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk);
     x = X(mktensor)(sz->rnk - 1);
     dimcpy(x->dims, sz->dims, except_dim);
     dimcpy(x->dims + except_dim, sz->dims + except_dim + 1,
            x->rnk - except_dim);
     return x;
}
예제 #30
0
static int rowmajor_kosherp(int rnk, const int *n)
{
     int i;

     if (!FINITE_RNK(rnk)) return 0;
     if (rnk < 0) return 0;

     for (i = 0; i < rnk; ++i)
	  if (n[i] <= 0) return 0;

     return 1;
}