Exemple #1
0
static void zero(const problem *ego_)
{
     const problem_dft *ego = (const problem_dft *) ego_;
     tensor *sz = X(tensor_append)(ego->vecsz, ego->sz);
     X(dft_zerotens)(sz, UNTAINT(ego->ri), UNTAINT(ego->ii));
     X(tensor_destroy)(sz);
}
problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz,
                          R *ri, R *ii, R *ro, R *io)
{
     problem_dft *ego =
          (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt);

     A((ri == ro) == (ii == io)); /* both in place or both out of place */
     A(X(tensor_kosherp)(sz));
     A(X(tensor_kosherp)(vecsz));

     /* enforce pointer equality if untainted pointers are equal */
     if (UNTAINT(ri) == UNTAINT(ro))
	  ri = ro = JOIN_TAINT(ri, ro);
     if (UNTAINT(ii) == UNTAINT(io))
	  ii = io = JOIN_TAINT(ii, io);

     /* more correctness conditions: */
     A(TAINTOF(ri) == TAINTOF(ii));
     A(TAINTOF(ro) == TAINTOF(io));

     ego->sz = X(tensor_compress)(sz);
     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
     ego->ri = ri;
     ego->ii = ii;
     ego->ro = ro;
     ego->io = io;

     A(FINITE_RNK(ego->sz->rnk));
     return &(ego->super);
}
Exemple #3
0
/* use the apply() operation for DFT problems */
void X(dft_solve)(const plan *ego_, const problem *p_)
{
     const plan_dft *ego = (const plan_dft *) ego_;
     const problem_dft *p = (const problem_dft *) p_;
     ego->apply(ego_, 
		UNTAINT(p->ri), UNTAINT(p->ii), 
		UNTAINT(p->ro), UNTAINT(p->io));
}
problem *XM(mkproblem_rdft)(const dtensor *sz, INT vn,
			    R *I, R *O,
			    MPI_Comm comm,
			    const rdft_kind *kind, unsigned flags)
{
     problem_mpi_rdft *ego;
     int i, rnk = sz->rnk;
     int n_pes;

     A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk));
     MPI_Comm_size(comm, &n_pes);
     A(n_pes >= XM(num_blocks_total)(sz, IB)
       && n_pes >= XM(num_blocks_total)(sz, OB));
     A(vn >= 0);

#if defined(STRUCT_HACK_KR)
     ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft)
					     + sizeof(rdft_kind)
					     * (rnk > 0 ? rnk - 1 : 0), &padt);
#elif defined(STRUCT_HACK_C99)
     ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft)
					     + sizeof(rdft_kind) * rnk, &padt);
#else
     ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft), &padt);
     ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS);
#endif

     /* enforce pointer equality if untainted pointers are equal */
     if (UNTAINT(I) == UNTAINT(O))
	  I = O = JOIN_TAINT(I, O);

     ego->sz = XM(dtensor_canonical)(sz, 0);
     ego->vn = vn;
     ego->I = I;
     ego->O = O;
     for (i = 0; i< ego->sz->rnk; ++i)
	  ego->kind[i] = kind[i];

     /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by
        swapping the first two dimensions (for rnk > 1) */
     if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) {
	  rdft_kind k = ego->kind[0];
	  ddim dim0 = ego->sz->dims[0];
	  ego->sz->dims[0] = ego->sz->dims[1];
	  ego->sz->dims[1] = dim0;
	  ego->kind[0] = ego->kind[1];
	  ego->kind[1] = k;
	  flags &= ~TRANSPOSED_IN;
	  flags ^= TRANSPOSED_OUT;
     }
     ego->flags = flags;

     MPI_Comm_dup(comm, &ego->comm);

     return &(ego->super);
}
Exemple #5
0
problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz,
			    R *r0, R *r1, R *cr, R *ci,
			    rdft_kind kind)
{
     problem_rdft2 *ego;

     A(kind == R2HC || kind == R2HCII || kind == HC2R || kind == HC2RIII);
     A(X(tensor_kosherp)(sz));
     A(X(tensor_kosherp)(vecsz));
     A(FINITE_RNK(sz->rnk));

     /* require in-place problems to use r0 == cr */
     if (UNTAINT(r0) == UNTAINT(ci))
	  return X(mkproblem_unsolvable)();

     /* FIXME: should check UNTAINT(r1) == UNTAINT(cr) but
	only if odd elements exist, which requires compressing the 
	tensors first */

     if (UNTAINT(r0) == UNTAINT(cr)) 
	  r0 = cr = JOIN_TAINT(r0, cr);

     ego = (problem_rdft2 *)X(mkproblem)(sizeof(problem_rdft2), &padt);

     if (sz->rnk > 1) { /* have to compress rnk-1 dims separately, ugh */
	  tensor *szc = X(tensor_copy_except)(sz, sz->rnk - 1);
	  tensor *szr = X(tensor_copy_sub)(sz, sz->rnk - 1, 1);
	  tensor *szcc = X(tensor_compress)(szc);
	  if (szcc->rnk > 0)
	       ego->sz = X(tensor_append)(szcc, szr);
	  else
	       ego->sz = X(tensor_compress)(szr);
	  X(tensor_destroy2)(szc, szr); X(tensor_destroy)(szcc);
     } else {
	  ego->sz = X(tensor_compress)(sz);
     }
     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
     ego->r0 = r0;
     ego->r1 = r1;
     ego->cr = cr;
     ego->ci = ci;
     ego->kind = kind;

     A(FINITE_RNK(ego->sz->rnk));
     return &(ego->super);

}
Exemple #6
0
problem *XM(mkproblem_dft)(const dtensor *sz, INT vn,
                           R *I, R *O,
                           MPI_Comm comm,
                           int sign,
                           unsigned flags)
{
    problem_mpi_dft *ego =
        (problem_mpi_dft *)X(mkproblem)(sizeof(problem_mpi_dft), &padt);
    int n_pes;

    A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk));
    MPI_Comm_size(comm, &n_pes);
    A(n_pes >= XM(num_blocks_total)(sz, IB)
      && n_pes >= XM(num_blocks_total)(sz, OB));
    A(vn >= 0);
    A(sign == -1 || sign == 1);

    /* enforce pointer equality if untainted pointers are equal */
    if (UNTAINT(I) == UNTAINT(O))
        I = O = JOIN_TAINT(I, O);

    ego->sz = XM(dtensor_canonical)(sz, 1);
    ego->vn = vn;
    ego->I = I;
    ego->O = O;
    ego->sign = sign;

    /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by
       swapping the first two dimensions (for rnk > 1) */
    if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) {
        ddim dim0 = ego->sz->dims[0];
        ego->sz->dims[0] = ego->sz->dims[1];
        ego->sz->dims[1] = dim0;
        flags &= ~TRANSPOSED_IN;
        flags ^= TRANSPOSED_OUT;
    }
    ego->flags = flags;

    MPI_Comm_dup(comm, &ego->comm);

    return &(ego->super);
}
Exemple #7
0
static void zero(const problem *ego_)
{
     const problem_rdft2 *ego = (const problem_rdft2 *) ego_;
     if (R2HC_KINDP(ego->kind)) {
	  /* FIXME: can we avoid the double recursion somehow? */
	  vrecur(ego->vecsz->dims, ego->vecsz->rnk, 
		 ego->sz->dims, ego->sz->rnk, 
		 UNTAINT(ego->r0), UNTAINT(ego->r1));
     } else {
	  tensor *sz;
	  tensor *sz2 = X(tensor_copy)(ego->sz);
	  int rnk = sz2->rnk;
	  if (rnk > 0) /* ~half as many complex outputs */
	       sz2->dims[rnk-1].n = 
		    X(rdft2_complex_n)(sz2->dims[rnk-1].n, ego->kind);
	  sz = X(tensor_append)(ego->vecsz, sz2);
	  X(tensor_destroy)(sz2);
	  X(dft_zerotens)(sz, UNTAINT(ego->cr), UNTAINT(ego->ci));
	  X(tensor_destroy)(sz);
     }
}
Exemple #8
0
int X(cell_copy_applicable)(R *I, R *O, const iodim *n, const iodim *v)
{
     return (1
	     && X(cell_nspe)() > 0
	     && UNTAINT(I) != UNTAINT(O)
	     && ALIGNEDA(I)
	     && ALIGNEDA(O)
	     && (n->n % VL) == 0
	     && ((v->n % VL) == 0 || (n->is == 2 && n->os == 2))
	     && ((n->is == 2 && SIMD_STRIDE_OKA(v->is)) 
		 ||
		 (v->is == 2 && SIMD_STRIDE_OKA(n->is)))
	     && ((n->os == 2 && SIMD_STRIDE_OKA(v->os))
		 ||
		 (v->os == 2 && SIMD_STRIDE_OKA(n->os)))
	     && FITS_IN_INT(n->n)
	     && FITS_IN_INT(n->is * sizeof(R))
	     && FITS_IN_INT(n->os * sizeof(R))
	     && FITS_IN_INT(v->n)
	     && FITS_IN_INT(v->is * sizeof(R))
	     && FITS_IN_INT(v->os * sizeof(R))
	  );
}
Exemple #9
0
problem *XM(mkproblem_transpose)(INT nx, INT ny, INT vn,
                                 R *I, R *O,
                                 INT block, INT tblock,
                                 MPI_Comm comm,
                                 unsigned flags)
{
    problem_mpi_transpose *ego =
        (problem_mpi_transpose *)X(mkproblem)(sizeof(problem_mpi_transpose), &padt);

    A(nx > 0 && ny > 0 && vn > 0);
    A(block > 0 && XM(num_blocks_ok)(nx, block, comm)
      && tblock > 0 && XM(num_blocks_ok)(ny, tblock, comm));

    /* enforce pointer equality if untainted pointers are equal */
    if (UNTAINT(I) == UNTAINT(O))
        I = O = JOIN_TAINT(I, O);

    ego->nx = nx;
    ego->ny = ny;
    ego->vn = vn;
    ego->I = I;
    ego->O = O;
    ego->block = block > nx ? nx : block;
    ego->tblock = tblock > ny ? ny : tblock;

    /* canonicalize flags: we can freely assume that the data is
    "transposed" if one of the dimensions is 1. */
    if (ego->block == 1)
        flags |= TRANSPOSED_IN;
    if (ego->tblock == 1)
        flags |= TRANSPOSED_OUT;
    ego->flags = flags;

    MPI_Comm_dup(comm, &ego->comm);

    return &(ego->super);
}
Exemple #10
0
problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz,
			  R *ri, R *ii, R *ro, R *io)
{
     problem_dft *ego;

     /* enforce pointer equality if untainted pointers are equal */
     if (UNTAINT(ri) == UNTAINT(ro))
	  ri = ro = JOIN_TAINT(ri, ro);
     if (UNTAINT(ii) == UNTAINT(io))
	  ii = io = JOIN_TAINT(ii, io);

     /* more correctness conditions: */
     A(TAINTOF(ri) == TAINTOF(ii));
     A(TAINTOF(ro) == TAINTOF(io));

     A(X(tensor_kosherp)(sz));
     A(X(tensor_kosherp)(vecsz));

     if (ri == ro || ii == io) {
	  /* If either real or imag pointers are in place, both must be. */
	  if (ri != ro || ii != io || !X(tensor_inplace_locations)(sz, vecsz))
	       return X(mkproblem_unsolvable)();
     }

     ego = (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt);

     ego->sz = X(tensor_compress)(sz);
     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
     ego->ri = ri;
     ego->ii = ii;
     ego->ro = ro;
     ego->io = io;

     A(FINITE_RNK(ego->sz->rnk));
     return &(ego->super);
}
Exemple #11
0
/* use the apply() operation for MPI_RDFT2 problems */
void XM(rdft2_solve)(const plan *ego_, const problem *p_)
{
     const plan_mpi_rdft2 *ego = (const plan_mpi_rdft2 *) ego_;
     const problem_mpi_rdft2 *p = (const problem_mpi_rdft2 *) p_;
     ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O));
}
Exemple #12
0
problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz,
			   R *I, R *O, const rdft_kind *kind)
{
     problem_rdft *ego;
     int rnk = sz->rnk;
     int i;

     A(X(tensor_kosherp)(sz));
     A(X(tensor_kosherp)(vecsz));
     A(FINITE_RNK(sz->rnk));

     if (UNTAINT(I) == UNTAINT(O))
	  I = O = JOIN_TAINT(I, O);

     if (I == O && !X(tensor_inplace_locations)(sz, vecsz))
	  return X(mkproblem_unsolvable)();

     for (i = rnk = 0; i < sz->rnk; ++i) {
          A(sz->dims[i].n > 0);
          if (nontrivial(sz->dims + i, kind[i]))
               ++rnk;
     }

#if defined(STRUCT_HACK_KR)
     ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft)
					 + sizeof(rdft_kind)
					 * (rnk > 0 ? rnk - 1u : 0u), &padt);
#elif defined(STRUCT_HACK_C99)
     ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft)
					 + sizeof(rdft_kind) * (unsigned)rnk, &padt);
#else
     ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft), &padt);
     ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * (unsigned)rnk, PROBLEMS);
#endif

     /* do compression and sorting as in X(tensor_compress), but take
	transform kind into account (sigh) */
     ego->sz = X(mktensor)(rnk);
     for (i = rnk = 0; i < sz->rnk; ++i) {
          if (nontrivial(sz->dims + i, kind[i])) {
	       ego->kind[rnk] = kind[i];
               ego->sz->dims[rnk++] = sz->dims[i];
	  }
     }
     for (i = 0; i + 1 < rnk; ++i) {
	  int j;
	  for (j = i + 1; j < rnk; ++j)
	       if (X(dimcmp)(ego->sz->dims + i, ego->sz->dims + j) > 0) {
		    iodim dswap;
		    rdft_kind kswap;
		    dswap = ego->sz->dims[i];
		    ego->sz->dims[i] = ego->sz->dims[j];
		    ego->sz->dims[j] = dswap;
		    kswap = ego->kind[i];
		    ego->kind[i] = ego->kind[j];
		    ego->kind[j] = kswap;
	       }
     }

     for (i = 0; i < rnk; ++i)
	  if (ego->sz->dims[i].n == 2 && (ego->kind[i] == REDFT00
					  || ego->kind[i] == DHT
					  || ego->kind[i] == HC2R))
	       ego->kind[i] = R2HC; /* size-2 transforms are equivalent */

     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
     ego->I = I;
     ego->O = O;

     A(FINITE_RNK(ego->sz->rnk));

     return &(ego->super);
}
Exemple #13
0
/* join the taint of two pointers that are supposed to be
   the same modulo the taint */
R *X(join_taint)(R *p1, R *p2)
{
     A(UNTAINT(p1) == UNTAINT(p2));
     return (R *)(PTRINT(p1) | PTRINT(p2));
}