static void dofft(info *nfo, R *in, R *out) { cpyr(in, nfo->pckdsz, (R *) nfo->p->in, nfo->totalsz); after_problem_rcopy_from(nfo->p, (bench_real *)nfo->p->in); doit(1, nfo->p); after_problem_rcopy_to(nfo->p, (bench_real *)nfo->p->out); cpyr((R *) nfo->p->out, nfo->totalsz, out, nfo->pckdsz); }
static void r2r_apply(dofft_closure *k_, bench_complex *in, bench_complex *out) { dofft_r2r_closure *k = (dofft_r2r_closure *)k_; bench_problem *p = k->p; bench_real *ri, *ro; int n, is, os; n = p->sz->dims[0].n; is = p->sz->dims[0].is; os = p->sz->dims[0].os; ri = (bench_real *) p->in; ro = (bench_real *) p->out; switch (p->k[0]) { case R2R_R2HC: cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0); break; case R2R_HC2R: cpyr1(n/2 + 1, &c_re(in[0]), 2, ri, is, 1.0); cpyr1((n+1)/2 - 1, &c_im(in[n-1]), -2, ri + is*(n-1), -is, 1.0); break; case R2R_REDFT00: cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0); break; case R2R_RODFT00: cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0); break; case R2R_REDFT01: cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0); break; case R2R_REDFT10: cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0); break; case R2R_RODFT01: cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0); break; case R2R_RODFT10: cpyr1(n, &c_im(in[1]), 4, ri, is, 1.0); break; case R2R_REDFT11: cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0); break; case R2R_RODFT11: cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0); break; default: BENCH_ASSERT(0); /* not yet implemented */ } after_problem_rcopy_from(p, ri); doit(1, p); after_problem_rcopy_to(p, ro); switch (p->k[0]) { case R2R_R2HC: if (k->k.recopy_input) cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0); cpyr1(n/2 + 1, ro, os, &c_re(out[0]), 2, 1.0); cpyr1((n+1)/2 - 1, ro + os*(n-1), -os, &c_im(out[1]), 2, 1.0); c_im(out[0]) = 0.0; if (n % 2 == 0) c_im(out[n/2]) = 0.0; mkhermitian1(out, n); break; case R2R_HC2R: if (k->k.recopy_input) { cpyr1(n/2 + 1, ri, is, &c_re(in[0]), 2, 1.0); cpyr1((n+1)/2 - 1, ri + is*(n-1), -is, &c_im(in[1]), 2,1.0); } cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0); mkreal(out, n); break; case R2R_REDFT00: if (k->k.recopy_input) cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0); cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0); mkre00(out, k->n0); break; case R2R_RODFT00: if (k->k.recopy_input) cpyr1(n, ri, is, &c_im(in[1]), 2, -1.0); cpyr1(n, ro, os, &c_im(out[1]), 2, -1.0); mkio00(out, k->n0); break; case R2R_REDFT01: if (k->k.recopy_input) cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0); cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0); mkre10(out, k->n0); break; case R2R_REDFT10: if (k->k.recopy_input) cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0); cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0); mkre01(out, k->n0); break; case R2R_RODFT01: if (k->k.recopy_input) cpyr1(n, ri, is, &c_re(in[1]), 2, 1.0); cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0); mkio10(out, k->n0); break; case R2R_RODFT10: if (k->k.recopy_input) cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0); cpyr1(n, ro, os, &c_re(out[1]), 2, 1.0); mkro01(out, k->n0); break; case R2R_REDFT11: if (k->k.recopy_input) cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0); cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0); mkre11(out, k->n0); break; case R2R_RODFT11: if (k->k.recopy_input) cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0); cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0); mkio11(out, k->n0); break; default: BENCH_ASSERT(0); /* not yet implemented */ } }
static void rdft2_apply(dofft_closure *k_, bench_complex *in, bench_complex *out) { dofft_rdft2_closure *k = (dofft_rdft2_closure *)k_; bench_problem *p = k->p; bench_tensor *totalsz, *pckdsz, *totalsz_swap, *pckdsz_swap; bench_tensor *probsz2, *totalsz2, *pckdsz2; bench_tensor *probsz2_swap, *totalsz2_swap, *pckdsz2_swap; bench_real *ri, *ii, *ro, *io; int n2, totalscale; totalsz = tensor_append(p->vecsz, p->sz); pckdsz = verify_pack(totalsz, 2); n2 = tensor_sz(totalsz); if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0) n2 = (n2 / p->sz->dims[p->sz->rnk - 1].n) * (p->sz->dims[p->sz->rnk - 1].n / 2 + 1); ri = (bench_real *) p->in; ro = (bench_real *) p->out; if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0 && n2 > 0) { probsz2 = tensor_copy_sub(p->sz, p->sz->rnk - 1, 1); totalsz2 = tensor_copy_sub(totalsz, 0, totalsz->rnk - 1); pckdsz2 = tensor_copy_sub(pckdsz, 0, pckdsz->rnk - 1); } else { probsz2 = mktensor(0); totalsz2 = tensor_copy(totalsz); pckdsz2 = tensor_copy(pckdsz); } totalsz_swap = tensor_copy_swapio(totalsz); pckdsz_swap = tensor_copy_swapio(pckdsz); totalsz2_swap = tensor_copy_swapio(totalsz2); pckdsz2_swap = tensor_copy_swapio(pckdsz2); probsz2_swap = tensor_copy_swapio(probsz2); /* confusion: the stride is the distance between complex elements when using interleaved format, but it is the distance between real elements when using split format */ if (p->split) { ii = p->ini ? (bench_real *) p->ini : ri + n2; io = p->outi ? (bench_real *) p->outi : ro + n2; totalscale = 1; } else { ii = p->ini ? (bench_real *) p->ini : ri + 1; io = p->outi ? (bench_real *) p->outi : ro + 1; totalscale = 2; } if (p->sign < 0) { /* R2HC */ int N, vN, i; cpyr(&c_re(in[0]), pckdsz, ri, totalsz); after_problem_rcopy_from(p, ri); doit(1, p); after_problem_hccopy_to(p, ro, io); if (k->k.recopy_input) cpyr(ri, totalsz_swap, &c_re(in[0]), pckdsz_swap); cpyhc2(ro, io, probsz2, totalsz2, totalscale, &c_re(out[0]), &c_im(out[0]), pckdsz2); N = tensor_sz(p->sz); vN = tensor_sz(p->vecsz); for (i = 0; i < vN; ++i) mkhermitian(out + i*N, p->sz->rnk, p->sz->dims, 1); } else { /* HC2R */ icpyhc2(ri, ii, probsz2, totalsz2, totalscale, &c_re(in[0]), &c_im(in[0]), pckdsz2); after_problem_hccopy_from(p, ri, ii); doit(1, p); after_problem_rcopy_to(p, ro); if (k->k.recopy_input) cpyhc2(ri, ii, probsz2_swap, totalsz2_swap, totalscale, &c_re(in[0]), &c_im(in[0]), pckdsz2_swap); mkreal(out, tensor_sz(pckdsz)); cpyr(ro, totalsz, &c_re(out[0]), pckdsz); } tensor_destroy(totalsz); tensor_destroy(pckdsz); tensor_destroy(totalsz_swap); tensor_destroy(pckdsz_swap); tensor_destroy(probsz2); tensor_destroy(totalsz2); tensor_destroy(pckdsz2); tensor_destroy(probsz2_swap); tensor_destroy(totalsz2_swap); tensor_destroy(pckdsz2_swap); }
void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii) { UNUSED(ii); after_problem_rcopy_from(p, ri); }