Exemple #1
0
static void dofft(info *nfo, R *in, R *out)
{
     cpyr(in, nfo->pckdsz, (R *) nfo->p->in, nfo->totalsz);
     after_problem_rcopy_from(nfo->p, (bench_real *)nfo->p->in);
     doit(1, nfo->p);
     after_problem_rcopy_to(nfo->p, (bench_real *)nfo->p->out);
     cpyr((R *) nfo->p->out, nfo->totalsz, out, nfo->pckdsz);
}
Exemple #2
0
static void r2r_apply(dofft_closure *k_, bench_complex *in, bench_complex *out)
{
     dofft_r2r_closure *k = (dofft_r2r_closure *)k_;
     bench_problem *p = k->p;
     bench_real *ri, *ro;
     int n, is, os;

     n = p->sz->dims[0].n;
     is = p->sz->dims[0].is;
     os = p->sz->dims[0].os;

     ri = (bench_real *) p->in;
     ro = (bench_real *) p->out;

     switch (p->k[0]) {
	 case R2R_R2HC:
	      cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
	      break;
	 case R2R_HC2R:
	      cpyr1(n/2 + 1, &c_re(in[0]), 2, ri, is, 1.0);
	      cpyr1((n+1)/2 - 1, &c_im(in[n-1]), -2, ri + is*(n-1), -is, 1.0);
	      break;
	 case R2R_REDFT00:
	      cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
	      break;
	 case R2R_RODFT00:
	      cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0);
	      break;
	 case R2R_REDFT01:
	      cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
	      break;
	 case R2R_REDFT10:
	      cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
	      break;
	 case R2R_RODFT01:
	      cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0);
	      break;
	 case R2R_RODFT10:
	      cpyr1(n, &c_im(in[1]), 4, ri, is, 1.0);
	      break;
	 case R2R_REDFT11:
	      cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
	      break;
	 case R2R_RODFT11:
	      cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
	      break;
	 default:
	      BENCH_ASSERT(0); /* not yet implemented */
     }

     after_problem_rcopy_from(p, ri);
     doit(1, p);
     after_problem_rcopy_to(p, ro);

     switch (p->k[0]) {
	 case R2R_R2HC:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
	      cpyr1(n/2 + 1, ro, os, &c_re(out[0]), 2, 1.0);
	      cpyr1((n+1)/2 - 1, ro + os*(n-1), -os, &c_im(out[1]), 2, 1.0);
	      c_im(out[0]) = 0.0;
	      if (n % 2 == 0)
		   c_im(out[n/2]) = 0.0;
	      mkhermitian1(out, n);
	      break;
	 case R2R_HC2R:
	      if (k->k.recopy_input) {
		   cpyr1(n/2 + 1, ri, is, &c_re(in[0]), 2, 1.0);
		   cpyr1((n+1)/2 - 1, ri + is*(n-1), -is, &c_im(in[1]), 2,1.0);
	      }
	      cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
	      mkreal(out, n);
	      break;
	 case R2R_REDFT00:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
	      cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
	      mkre00(out, k->n0);
	      break;
	 case R2R_RODFT00:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_im(in[1]), 2, -1.0);
	      cpyr1(n, ro, os, &c_im(out[1]), 2, -1.0);
	      mkio00(out, k->n0);
	      break;
	 case R2R_REDFT01:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
	      cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0);
	      mkre10(out, k->n0);
	      break;
	 case R2R_REDFT10:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0);
	      cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
	      mkre01(out, k->n0);
	      break;
	 case R2R_RODFT01:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_re(in[1]), 2, 1.0);
	      cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0);
	      mkio10(out, k->n0);
	      break;
	 case R2R_RODFT10:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0);
	      cpyr1(n, ro, os, &c_re(out[1]), 2, 1.0);
	      mkro01(out, k->n0);
	      break;
	 case R2R_REDFT11:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0);
	      cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0);
	      mkre11(out, k->n0);
	      break;
	 case R2R_RODFT11:
	      if (k->k.recopy_input)
		   cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0);
	      cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0);
	      mkio11(out, k->n0);
	      break;
	 default:
	      BENCH_ASSERT(0); /* not yet implemented */
     }
}
Exemple #3
0
static void rdft2_apply(dofft_closure *k_, 
			bench_complex *in, bench_complex *out)
{
     dofft_rdft2_closure *k = (dofft_rdft2_closure *)k_;
     bench_problem *p = k->p;
     bench_tensor *totalsz, *pckdsz, *totalsz_swap, *pckdsz_swap;
     bench_tensor *probsz2, *totalsz2, *pckdsz2;
     bench_tensor *probsz2_swap, *totalsz2_swap, *pckdsz2_swap;
     bench_real *ri, *ii, *ro, *io;
     int n2, totalscale;

     totalsz = tensor_append(p->vecsz, p->sz);
     pckdsz = verify_pack(totalsz, 2);
     n2 = tensor_sz(totalsz);
     if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0)
	  n2 = (n2 / p->sz->dims[p->sz->rnk - 1].n) * 
	       (p->sz->dims[p->sz->rnk - 1].n / 2 + 1);
     ri = (bench_real *) p->in;
     ro = (bench_real *) p->out;

     if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0 && n2 > 0) {
	  probsz2 = tensor_copy_sub(p->sz, p->sz->rnk - 1, 1);
	  totalsz2 = tensor_copy_sub(totalsz, 0, totalsz->rnk - 1);
	  pckdsz2 = tensor_copy_sub(pckdsz, 0, pckdsz->rnk - 1);
     }
     else {
	  probsz2 = mktensor(0);
	  totalsz2 = tensor_copy(totalsz);
	  pckdsz2 = tensor_copy(pckdsz);
     }

     totalsz_swap = tensor_copy_swapio(totalsz);
     pckdsz_swap = tensor_copy_swapio(pckdsz);
     totalsz2_swap = tensor_copy_swapio(totalsz2);
     pckdsz2_swap = tensor_copy_swapio(pckdsz2);
     probsz2_swap = tensor_copy_swapio(probsz2);

     /* confusion: the stride is the distance between complex elements
	when using interleaved format, but it is the distance between
	real elements when using split format */
     if (p->split) {
	  ii = p->ini ? (bench_real *) p->ini : ri + n2;
	  io = p->outi ? (bench_real *) p->outi : ro + n2;
	  totalscale = 1;
     } else {
	  ii = p->ini ? (bench_real *) p->ini : ri + 1;
	  io = p->outi ? (bench_real *) p->outi : ro + 1;
	  totalscale = 2;
     }

     if (p->sign < 0) { /* R2HC */
	  int N, vN, i;
	  cpyr(&c_re(in[0]), pckdsz, ri, totalsz);
	  after_problem_rcopy_from(p, ri);
	  doit(1, p);
	  after_problem_hccopy_to(p, ro, io);
	  if (k->k.recopy_input)
	       cpyr(ri, totalsz_swap, &c_re(in[0]), pckdsz_swap);
	  cpyhc2(ro, io, probsz2, totalsz2, totalscale,
		 &c_re(out[0]), &c_im(out[0]), pckdsz2);
	  N = tensor_sz(p->sz);
	  vN = tensor_sz(p->vecsz);
	  for (i = 0; i < vN; ++i)
	       mkhermitian(out + i*N, p->sz->rnk, p->sz->dims, 1);
     }
     else { /* HC2R */
	  icpyhc2(ri, ii, probsz2, totalsz2, totalscale,
		  &c_re(in[0]), &c_im(in[0]), pckdsz2);
	  after_problem_hccopy_from(p, ri, ii);
	  doit(1, p);
	  after_problem_rcopy_to(p, ro);
	  if (k->k.recopy_input)
	       cpyhc2(ri, ii, probsz2_swap, totalsz2_swap, totalscale,
		      &c_re(in[0]), &c_im(in[0]), pckdsz2_swap);
	  mkreal(out, tensor_sz(pckdsz));
	  cpyr(ro, totalsz, &c_re(out[0]), pckdsz);
     }

     tensor_destroy(totalsz);
     tensor_destroy(pckdsz);
     tensor_destroy(totalsz_swap);
     tensor_destroy(pckdsz_swap);
     tensor_destroy(probsz2);
     tensor_destroy(totalsz2);
     tensor_destroy(pckdsz2);
     tensor_destroy(probsz2_swap);
     tensor_destroy(totalsz2_swap);
     tensor_destroy(pckdsz2_swap);
}
void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii)
{
     UNUSED(ii);
     after_problem_rcopy_from(p, ri);
}