problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz, R *ri, R *ii, R *ro, R *io) { problem_dft *ego = (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt); A((ri == ro) == (ii == io)); /* both in place or both out of place */ A(X(tensor_kosherp)(sz)); A(X(tensor_kosherp)(vecsz)); /* enforce pointer equality if untainted pointers are equal */ if (UNTAINT(ri) == UNTAINT(ro)) ri = ro = JOIN_TAINT(ri, ro); if (UNTAINT(ii) == UNTAINT(io)) ii = io = JOIN_TAINT(ii, io); /* more correctness conditions: */ A(TAINTOF(ri) == TAINTOF(ii)); A(TAINTOF(ro) == TAINTOF(io)); ego->sz = X(tensor_compress)(sz); ego->vecsz = X(tensor_compress_contiguous)(vecsz); ego->ri = ri; ego->ii = ii; ego->ro = ro; ego->io = io; A(FINITE_RNK(ego->sz->rnk)); return &(ego->super); }
problem *XM(mkproblem_rdft)(const dtensor *sz, INT vn, R *I, R *O, MPI_Comm comm, const rdft_kind *kind, unsigned flags) { problem_mpi_rdft *ego; int i, rnk = sz->rnk; int n_pes; A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk)); MPI_Comm_size(comm, &n_pes); A(n_pes >= XM(num_blocks_total)(sz, IB) && n_pes >= XM(num_blocks_total)(sz, OB)); A(vn >= 0); #if defined(STRUCT_HACK_KR) ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft) + sizeof(rdft_kind) * (rnk > 0 ? rnk - 1 : 0), &padt); #elif defined(STRUCT_HACK_C99) ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft) + sizeof(rdft_kind) * rnk, &padt); #else ego = (problem_mpi_rdft *) X(mkproblem)(sizeof(problem_mpi_rdft), &padt); ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS); #endif /* enforce pointer equality if untainted pointers are equal */ if (UNTAINT(I) == UNTAINT(O)) I = O = JOIN_TAINT(I, O); ego->sz = XM(dtensor_canonical)(sz, 0); ego->vn = vn; ego->I = I; ego->O = O; for (i = 0; i< ego->sz->rnk; ++i) ego->kind[i] = kind[i]; /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by swapping the first two dimensions (for rnk > 1) */ if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) { rdft_kind k = ego->kind[0]; ddim dim0 = ego->sz->dims[0]; ego->sz->dims[0] = ego->sz->dims[1]; ego->sz->dims[1] = dim0; ego->kind[0] = ego->kind[1]; ego->kind[1] = k; flags &= ~TRANSPOSED_IN; flags ^= TRANSPOSED_OUT; } ego->flags = flags; MPI_Comm_dup(comm, &ego->comm); return &(ego->super); }
problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz, R *ri, R *ii, R *ro, R *io) { problem_dft *ego; /* enforce pointer equality if untainted pointers are equal */ if (UNTAINT(ri) == UNTAINT(ro)) ri = ro = JOIN_TAINT(ri, ro); if (UNTAINT(ii) == UNTAINT(io)) ii = io = JOIN_TAINT(ii, io); /* more correctness conditions: */ A(TAINTOF(ri) == TAINTOF(ii)); A(TAINTOF(ro) == TAINTOF(io)); A(X(tensor_kosherp)(sz)); A(X(tensor_kosherp)(vecsz)); if (ri == ro || ii == io) { /* If either real or imag pointers are in place, both must be. */ if (ri != ro || ii != io || !X(tensor_inplace_locations)(sz, vecsz)) return X(mkproblem_unsolvable)(); } ego = (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt); ego->sz = X(tensor_compress)(sz); ego->vecsz = X(tensor_compress_contiguous)(vecsz); ego->ri = ri; ego->ii = ii; ego->ro = ro; ego->io = io; A(FINITE_RNK(ego->sz->rnk)); return &(ego->super); }
problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz, R *r0, R *r1, R *cr, R *ci, rdft_kind kind) { problem_rdft2 *ego; A(kind == R2HC || kind == R2HCII || kind == HC2R || kind == HC2RIII); A(X(tensor_kosherp)(sz)); A(X(tensor_kosherp)(vecsz)); A(FINITE_RNK(sz->rnk)); /* require in-place problems to use r0 == cr */ if (UNTAINT(r0) == UNTAINT(ci)) return X(mkproblem_unsolvable)(); /* FIXME: should check UNTAINT(r1) == UNTAINT(cr) but only if odd elements exist, which requires compressing the tensors first */ if (UNTAINT(r0) == UNTAINT(cr)) r0 = cr = JOIN_TAINT(r0, cr); ego = (problem_rdft2 *)X(mkproblem)(sizeof(problem_rdft2), &padt); if (sz->rnk > 1) { /* have to compress rnk-1 dims separately, ugh */ tensor *szc = X(tensor_copy_except)(sz, sz->rnk - 1); tensor *szr = X(tensor_copy_sub)(sz, sz->rnk - 1, 1); tensor *szcc = X(tensor_compress)(szc); if (szcc->rnk > 0) ego->sz = X(tensor_append)(szcc, szr); else ego->sz = X(tensor_compress)(szr); X(tensor_destroy2)(szc, szr); X(tensor_destroy)(szcc); } else { ego->sz = X(tensor_compress)(sz); } ego->vecsz = X(tensor_compress_contiguous)(vecsz); ego->r0 = r0; ego->r1 = r1; ego->cr = cr; ego->ci = ci; ego->kind = kind; A(FINITE_RNK(ego->sz->rnk)); return &(ego->super); }
problem *XM(mkproblem_dft)(const dtensor *sz, INT vn, R *I, R *O, MPI_Comm comm, int sign, unsigned flags) { problem_mpi_dft *ego = (problem_mpi_dft *)X(mkproblem)(sizeof(problem_mpi_dft), &padt); int n_pes; A(XM(dtensor_validp)(sz) && FINITE_RNK(sz->rnk)); MPI_Comm_size(comm, &n_pes); A(n_pes >= XM(num_blocks_total)(sz, IB) && n_pes >= XM(num_blocks_total)(sz, OB)); A(vn >= 0); A(sign == -1 || sign == 1); /* enforce pointer equality if untainted pointers are equal */ if (UNTAINT(I) == UNTAINT(O)) I = O = JOIN_TAINT(I, O); ego->sz = XM(dtensor_canonical)(sz, 1); ego->vn = vn; ego->I = I; ego->O = O; ego->sign = sign; /* canonicalize: replace TRANSPOSED_IN with TRANSPOSED_OUT by swapping the first two dimensions (for rnk > 1) */ if ((flags & TRANSPOSED_IN) && ego->sz->rnk > 1) { ddim dim0 = ego->sz->dims[0]; ego->sz->dims[0] = ego->sz->dims[1]; ego->sz->dims[1] = dim0; flags &= ~TRANSPOSED_IN; flags ^= TRANSPOSED_OUT; } ego->flags = flags; MPI_Comm_dup(comm, &ego->comm); return &(ego->super); }
problem *XM(mkproblem_transpose)(INT nx, INT ny, INT vn, R *I, R *O, INT block, INT tblock, MPI_Comm comm, unsigned flags) { problem_mpi_transpose *ego = (problem_mpi_transpose *)X(mkproblem)(sizeof(problem_mpi_transpose), &padt); A(nx > 0 && ny > 0 && vn > 0); A(block > 0 && XM(num_blocks_ok)(nx, block, comm) && tblock > 0 && XM(num_blocks_ok)(ny, tblock, comm)); /* enforce pointer equality if untainted pointers are equal */ if (UNTAINT(I) == UNTAINT(O)) I = O = JOIN_TAINT(I, O); ego->nx = nx; ego->ny = ny; ego->vn = vn; ego->I = I; ego->O = O; ego->block = block > nx ? nx : block; ego->tblock = tblock > ny ? ny : tblock; /* canonicalize flags: we can freely assume that the data is "transposed" if one of the dimensions is 1. */ if (ego->block == 1) flags |= TRANSPOSED_IN; if (ego->tblock == 1) flags |= TRANSPOSED_OUT; ego->flags = flags; MPI_Comm_dup(comm, &ego->comm); return &(ego->super); }
problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz, R *I, R *O, const rdft_kind *kind) { problem_rdft *ego; int rnk = sz->rnk; int i; A(X(tensor_kosherp)(sz)); A(X(tensor_kosherp)(vecsz)); A(FINITE_RNK(sz->rnk)); if (UNTAINT(I) == UNTAINT(O)) I = O = JOIN_TAINT(I, O); if (I == O && !X(tensor_inplace_locations)(sz, vecsz)) return X(mkproblem_unsolvable)(); for (i = rnk = 0; i < sz->rnk; ++i) { A(sz->dims[i].n > 0); if (nontrivial(sz->dims + i, kind[i])) ++rnk; } #if defined(STRUCT_HACK_KR) ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft) + sizeof(rdft_kind) * (rnk > 0 ? rnk - 1u : 0u), &padt); #elif defined(STRUCT_HACK_C99) ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft) + sizeof(rdft_kind) * (unsigned)rnk, &padt); #else ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft), &padt); ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * (unsigned)rnk, PROBLEMS); #endif /* do compression and sorting as in X(tensor_compress), but take transform kind into account (sigh) */ ego->sz = X(mktensor)(rnk); for (i = rnk = 0; i < sz->rnk; ++i) { if (nontrivial(sz->dims + i, kind[i])) { ego->kind[rnk] = kind[i]; ego->sz->dims[rnk++] = sz->dims[i]; } } for (i = 0; i + 1 < rnk; ++i) { int j; for (j = i + 1; j < rnk; ++j) if (X(dimcmp)(ego->sz->dims + i, ego->sz->dims + j) > 0) { iodim dswap; rdft_kind kswap; dswap = ego->sz->dims[i]; ego->sz->dims[i] = ego->sz->dims[j]; ego->sz->dims[j] = dswap; kswap = ego->kind[i]; ego->kind[i] = ego->kind[j]; ego->kind[j] = kswap; } } for (i = 0; i < rnk; ++i) if (ego->sz->dims[i].n == 2 && (ego->kind[i] == REDFT00 || ego->kind[i] == DHT || ego->kind[i] == HC2R)) ego->kind[i] = R2HC; /* size-2 transforms are equivalent */ ego->vecsz = X(tensor_compress_contiguous)(vecsz); ego->I = I; ego->O = O; A(FINITE_RNK(ego->sz->rnk)); return &(ego->super); }