static plan *mkplan(const solver *ego, const problem *p_, planner *plnr) { const problem_mpi_dft *p = (const problem_mpi_dft *) p_; P *pln; plan *cld; int my_pe; R *ri, *ii, *ro, *io; static const plan_adt padt = { XM(dft_solve), awake, print, destroy }; UNUSED(ego); /* check whether applicable: */ if (!XM(dft_serial_applicable)(p)) return (plan *) 0; X(extract_reim)(p->sign, p->I, &ri, &ii); X(extract_reim)(p->sign, p->O, &ro, &io); MPI_Comm_rank(p->comm, &my_pe); if (my_pe == 0 && p->vn > 0) { int i, rnk = p->sz->rnk; tensor *sz = X(mktensor)(p->sz->rnk); sz->dims[rnk - 1].is = sz->dims[rnk - 1].os = 2 * p->vn; sz->dims[rnk - 1].n = p->sz->dims[rnk - 1].n; for (i = rnk - 1; i > 0; --i) { sz->dims[i - 1].is = sz->dims[i - 1].os = sz->dims[i].is * sz->dims[i].n; sz->dims[i - 1].n = p->sz->dims[i - 1].n; } cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(sz, X(mktensor_1d)(p->vn, 2, 2), ri, ii, ro, io)); } else { /* idle process: make nop plan */ cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_0d)(), X(mktensor_1d)(0,0,0), ri, ii, ro, io)); } if (XM(any_true)(!cld, p->comm)) return (plan *) 0; pln = MKPLAN_MPI_DFT(P, &padt, apply); pln->cld = cld; pln->roff = ro - p->O; pln->ioff = io - p->O; X(ops_cpy)(&cld->ops, &pln->super.super.ops); return &(pln->super.super); }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_mpi_dft *p; P *pln; plan *cld = 0, *cldt_before = 0, *cldt_after = 0; R *ri, *ii, *ro, *io, *I, *O; INT yblock, yb, nx, ny, vn; int my_pe, n_pes; static const plan_adt padt = { XM(dft_solve), awake, print, destroy }; UNUSED(ego); if (!applicable(ego, p_, plnr)) return (plan *) 0; p = (const problem_mpi_dft *) p_; MPI_Comm_rank(p->comm, &my_pe); MPI_Comm_size(p->comm, &n_pes); nx = p->sz->dims[0].n; if (!(ny = XM(rearrange_ny)(ego->rearrange, p->sz->dims[0],p->vn,n_pes))) return (plan *) 0; vn = p->vn / ny; A(ny * vn == p->vn); yblock = XM(default_block)(ny, n_pes); cldt_before = X(mkplan_d)(plnr, XM(mkproblem_transpose)( nx, ny, vn*2, I = p->I, O = p->O, p->sz->dims[0].b[IB], yblock, p->comm, 0)); if (XM(any_true)(!cldt_before, p->comm)) goto nada; if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) { I = O; } X(extract_reim)(p->sign, I, &ri, &ii); X(extract_reim)(p->sign, O, &ro, &io); yb = XM(block)(ny, yblock, my_pe); cld = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_1d)(nx, vn*2, vn*2), X(mktensor_2d)(yb, vn*2*nx, vn*2*nx, vn, 2, 2), ro, io, ri, ii)); if (XM(any_true)(!cld, p->comm)) goto nada; cldt_after = X(mkplan_d)(plnr, XM(mkproblem_transpose)( ny, nx, vn*2, I, O, yblock, p->sz->dims[0].b[OB], p->comm, 0)); if (XM(any_true)(!cldt_after, p->comm)) goto nada; pln = MKPLAN_MPI_DFT(P, &padt, apply); pln->cldt_before = cldt_before; pln->cld = cld; pln->cldt_after = cldt_after; pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); pln->roff = ro - p->O; pln->ioff = io - p->O; pln->rearrange = ego->rearrange; X(ops_add)(&cldt_before->ops, &cld->ops, &pln->super.super.ops); X(ops_add2)(&cldt_after->ops, &pln->super.super.ops); return &(pln->super.super); nada: X(plan_destroy_internal)(cldt_after); X(plan_destroy_internal)(cld); X(plan_destroy_internal)(cldt_before); return (plan *) 0; }
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { const S *ego = (const S *) ego_; const problem_mpi_dft *p; P *pln; plan *cld1 = 0, *cldt = 0, *cld2 = 0; R *ri, *ii, *ro, *io, *I, *O; tensor *sz; int i, my_pe, n_pes; INT nrest; static const plan_adt padt = { XM(dft_solve), awake, print, destroy }; UNUSED(ego); if (!applicable(ego, p_, plnr)) return (plan *) 0; p = (const problem_mpi_dft *) p_; X(extract_reim)(p->sign, I = p->I, &ri, &ii); X(extract_reim)(p->sign, O = p->O, &ro, &io); if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) I = O; else { ro = ri; io = ii; } MPI_Comm_rank(p->comm, &my_pe); MPI_Comm_size(p->comm, &n_pes); sz = X(mktensor)(p->sz->rnk - 1); /* tensor of last rnk-1 dimensions */ i = p->sz->rnk - 2; A(i >= 0); sz->dims[i].n = p->sz->dims[i+1].n; sz->dims[i].is = sz->dims[i].os = 2 * p->vn; for (--i; i >= 0; --i) { sz->dims[i].n = p->sz->dims[i+1].n; sz->dims[i].is = sz->dims[i].os = sz->dims[i+1].n * sz->dims[i+1].is; } nrest = 1; for (i = 1; i < sz->rnk; ++i) nrest *= sz->dims[i].n; { INT is = sz->dims[0].n * sz->dims[0].is; INT b = XM(block)(p->sz->dims[0].n, p->sz->dims[0].b[IB], my_pe); cld1 = X(mkplan_d)(plnr, X(mkproblem_dft_d)(sz, X(mktensor_2d)(b, is, is, p->vn, 2, 2), ri, ii, ro, io)); if (XM(any_true)(!cld1, p->comm)) goto nada; } nrest *= p->vn; cldt = X(mkplan_d)(plnr, XM(mkproblem_transpose)( p->sz->dims[0].n, p->sz->dims[1].n, nrest * 2, I, O, p->sz->dims[0].b[IB], p->sz->dims[1].b[OB], p->comm, 0)); if (XM(any_true)(!cldt, p->comm)) goto nada; X(extract_reim)(p->sign, O, &ro, &io); { INT is = p->sz->dims[0].n * nrest * 2; INT b = XM(block)(p->sz->dims[1].n, p->sz->dims[1].b[OB], my_pe); cld2 = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_1d)( p->sz->dims[0].n, nrest * 2, nrest * 2), X(mktensor_2d)(b, is, is, nrest, 2, 2), ro, io, ro, io)); if (XM(any_true)(!cld2, p->comm)) goto nada; } pln = MKPLAN_MPI_DFT(P, &padt, apply); pln->cld1 = cld1; pln->cldt = cldt; pln->cld2 = cld2; pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr); pln->roff = ri - p->I; pln->ioff = ii - p->I; X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops); X(ops_add2)(&cldt->ops, &pln->super.super.ops); return &(pln->super.super); nada: X(plan_destroy_internal)(cld2); X(plan_destroy_internal)(cldt); X(plan_destroy_internal)(cld1); return (plan *) 0; }