bench_tensor *tensor_append(const bench_tensor *a, const bench_tensor *b) { if (!BENCH_FINITE_RNK(a->rnk) || !BENCH_FINITE_RNK(b->rnk)) { return mktensor(BENCH_RNK_MINFTY); } else { bench_tensor *x = mktensor(a->rnk + b->rnk); dimcpy(x->dims, a->dims, a->rnk); dimcpy(x->dims + a->rnk, b->dims, b->rnk); return x; } }
bench_tensor *tensor_compress(const bench_tensor *sz) { int i, rnk; bench_tensor *x; BENCH_ASSERT(BENCH_FINITE_RNK(sz->rnk)); for (i = rnk = 0; i < sz->rnk; ++i) { BENCH_ASSERT(sz->dims[i].n > 0); if (sz->dims[i].n != 1) ++rnk; } x = mktensor(rnk); for (i = rnk = 0; i < sz->rnk; ++i) { if (sz->dims[i].n != 1) x->dims[rnk++] = sz->dims[i]; } if (rnk) { /* God knows how qsort() behaves if n==0 */ qsort(x->dims, (size_t)x->rnk, sizeof(bench_iodim), (int (*)(const void *, const void *))dimcmp); } return x; }
/* Like tensor_copy, but copy only rnk dimensions starting with start_dim. */ bench_tensor *tensor_copy_sub(const bench_tensor *sz, int start_dim, int rnk) { bench_tensor *x; BENCH_ASSERT(BENCH_FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk); x = mktensor(rnk); dimcpy(x->dims, sz->dims + start_dim, rnk); return x; }
static void rdft2_apply(dofft_closure *k_, bench_complex *in, bench_complex *out) { dofft_rdft2_closure *k = (dofft_rdft2_closure *)k_; bench_problem *p = k->p; bench_tensor *totalsz, *pckdsz, *totalsz_swap, *pckdsz_swap; bench_tensor *probsz2, *totalsz2, *pckdsz2; bench_tensor *probsz2_swap, *totalsz2_swap, *pckdsz2_swap; bench_real *ri, *ii, *ro, *io; int n2, totalscale; totalsz = tensor_append(p->vecsz, p->sz); pckdsz = verify_pack(totalsz, 2); n2 = tensor_sz(totalsz); if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0) n2 = (n2 / p->sz->dims[p->sz->rnk - 1].n) * (p->sz->dims[p->sz->rnk - 1].n / 2 + 1); ri = (bench_real *) p->in; ro = (bench_real *) p->out; if (FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0 && n2 > 0) { probsz2 = tensor_copy_sub(p->sz, p->sz->rnk - 1, 1); totalsz2 = tensor_copy_sub(totalsz, 0, totalsz->rnk - 1); pckdsz2 = tensor_copy_sub(pckdsz, 0, pckdsz->rnk - 1); } else { probsz2 = mktensor(0); totalsz2 = tensor_copy(totalsz); pckdsz2 = tensor_copy(pckdsz); } totalsz_swap = tensor_copy_swapio(totalsz); pckdsz_swap = tensor_copy_swapio(pckdsz); totalsz2_swap = tensor_copy_swapio(totalsz2); pckdsz2_swap = tensor_copy_swapio(pckdsz2); probsz2_swap = tensor_copy_swapio(probsz2); /* confusion: the stride is the distance between complex elements when using interleaved format, but it is the distance between real elements when using split format */ if (p->split) { ii = p->ini ? (bench_real *) p->ini : ri + n2; io = p->outi ? (bench_real *) p->outi : ro + n2; totalscale = 1; } else { ii = p->ini ? (bench_real *) p->ini : ri + 1; io = p->outi ? (bench_real *) p->outi : ro + 1; totalscale = 2; } if (p->sign < 0) { /* R2HC */ int N, vN, i; cpyr(&c_re(in[0]), pckdsz, ri, totalsz); after_problem_rcopy_from(p, ri); doit(1, p); after_problem_hccopy_to(p, ro, io); if (k->k.recopy_input) cpyr(ri, totalsz_swap, &c_re(in[0]), pckdsz_swap); cpyhc2(ro, io, probsz2, totalsz2, totalscale, &c_re(out[0]), &c_im(out[0]), pckdsz2); N = tensor_sz(p->sz); vN = tensor_sz(p->vecsz); for (i = 0; i < vN; ++i) mkhermitian(out + i*N, p->sz->rnk, p->sz->dims, 1); } else { /* HC2R */ icpyhc2(ri, ii, probsz2, totalsz2, totalscale, &c_re(in[0]), &c_im(in[0]), pckdsz2); after_problem_hccopy_from(p, ri, ii); doit(1, p); after_problem_rcopy_to(p, ro); if (k->k.recopy_input) cpyhc2(ri, ii, probsz2_swap, totalsz2_swap, totalscale, &c_re(in[0]), &c_im(in[0]), pckdsz2_swap); mkreal(out, tensor_sz(pckdsz)); cpyr(ro, totalsz, &c_re(out[0]), pckdsz); } tensor_destroy(totalsz); tensor_destroy(pckdsz); tensor_destroy(totalsz_swap); tensor_destroy(pckdsz_swap); tensor_destroy(probsz2); tensor_destroy(totalsz2); tensor_destroy(pckdsz2); tensor_destroy(probsz2_swap); tensor_destroy(totalsz2_swap); tensor_destroy(pckdsz2_swap); }
/* parse a problem description, return a problem */ bench_problem *problem_parse(const char *s) { bench_problem *p; bench_iodim last_iodim0 = {1,1,1}, *last_iodim = &last_iodim0; bench_iodim *sz_last_iodim; bench_tensor *sz; n_transform nti = SAME, nto = SAME; int transpose = 0; p = (bench_problem *) bench_malloc(sizeof(bench_problem)); p->kind = PROBLEM_COMPLEX; p->k = 0; p->sign = -1; p->in = p->out = 0; p->inphys = p->outphys = 0; p->iphyssz = p->ophyssz = 0; p->in_place = 0; p->destroy_input = 0; p->split = 0; p->userinfo = 0; p->scrambled_in = p->scrambled_out = 0; p->sz = p->vecsz = 0; p->ini = p->outi = 0; p->pstring = (char *) bench_malloc(sizeof(char) * (strlen(s) + 1)); strcpy(p->pstring, s); L1: switch (tolower(*s)) { case 'i': p->in_place = 1; ++s; goto L1; case 'o': p->in_place = 0; ++s; goto L1; case 'd': p->destroy_input = 1; ++s; goto L1; case '/': p->split = 1; ++s; goto L1; case 'f': case '-': p->sign = -1; ++s; goto L1; case 'b': case '+': p->sign = 1; ++s; goto L1; case 'r': p->kind = PROBLEM_REAL; ++s; goto L1; case 'c': p->kind = PROBLEM_COMPLEX; ++s; goto L1; case 'k': p->kind = PROBLEM_R2R; ++s; goto L1; case 't': transpose = 1; ++s; goto L1; /* hack for MPI: */ case '[': p->scrambled_in = 1; ++s; goto L1; case ']': p->scrambled_out = 1; ++s; goto L1; default : ; } s = parsetensor(s, &sz, p->kind == PROBLEM_R2R ? &p->k : 0); if (p->kind == PROBLEM_REAL) { if (p->sign < 0) { nti = p->in_place || always_pad_real ? PADDED : SAME; nto = HALFISH; } else { nti = HALFISH; nto = p->in_place || always_pad_real ? PADDED : SAME; } } sz_last_iodim = sz->dims + sz->rnk - 1; if (*s == '*') { /* "external" vector */ ++s; p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); s = parsetensor(s, &sz, 0); p->vecsz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); } else if (*s == 'v' || *s == 'V') { /* "internal" vector */ bench_tensor *vecsz; ++s; s = parsetensor(s, &vecsz, 0); p->vecsz = dwim(vecsz, &last_iodim, nti, nto, sz_last_iodim); p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); } else { p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); p->vecsz = mktensor(0); } if (transpose) { transpose_tensor(p->sz); transpose_tensor(p->vecsz); } if (!p->in_place) p->out = ((bench_real *) p->in) + (1 << 20); /* whatever */ BENCH_ASSERT(p->sz && p->vecsz); BENCH_ASSERT(!*s); return p; }
static const char *parsetensor(const char *s, bench_tensor **tp, r2r_kind_t **k) { struct dimlist *l = 0, *m; bench_tensor *t; int rnk = 0; L1: m = (struct dimlist *)bench_malloc(sizeof(struct dimlist)); /* nconc onto l */ m->cdr = l; l = m; ++rnk; s = parseint(s, &m->car.n); if (*s == ':') { /* read input stride */ ++s; s = parseint(s, &m->car.is); if (*s == ':') { /* read output stride */ ++s; s = parseint(s, &m->car.os); } else { /* default */ m->car.os = m->car.is; } } else { m->car.is = 0; m->car.os = 0; } if (*s == 'f' || *s == 'F') { m->k = R2R_R2HC; ++s; } else if (*s == 'b' || *s == 'B') { m->k = R2R_HC2R; ++s; } else if (*s == 'h' || *s == 'H') { m->k = R2R_DHT; ++s; } else if (*s == 'e' || *s == 'E' || *s == 'o' || *s == 'O') { char c = *(s++); int ab; s = parseint(s, &ab); if (c == 'e' || c == 'E') { if (ab == 0) m->k = R2R_REDFT00; else if (ab == 1) m->k = R2R_REDFT01; else if (ab == 10) m->k = R2R_REDFT10; else if (ab == 11) m->k = R2R_REDFT11; else BENCH_ASSERT(0); } else { if (ab == 0) m->k = R2R_RODFT00; else if (ab == 1) m->k = R2R_RODFT01; else if (ab == 10) m->k = R2R_RODFT10; else if (ab == 11) m->k = R2R_RODFT11; else BENCH_ASSERT(0); } } else m->k = R2R_R2HC; if (*s == 'x' || *s == 'X') { ++s; goto L1; } /* now we have a dimlist. Build bench_tensor, etc. */ if (k && rnk > 0) { int i; *k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * rnk); for (m = l, i = rnk - 1; i >= 0; --i, m = m->cdr) { BENCH_ASSERT(m); (*k)[i] = m->k; } } t = mktensor(rnk); while (--rnk >= 0) { bench_iodim *d = t->dims + rnk; BENCH_ASSERT(l); m = l; l = m->cdr; d->n = m->car.n; d->is = m->car.is; d->os = m->car.os; bench_free(m); } *tp = t; return s; }
bench_tensor *tensor_copy(const bench_tensor *sz) { bench_tensor *x = mktensor(sz->rnk); dimcpy(x->dims, sz->dims, sz->rnk); return x; }