/* * Allocate I/O arrays for a problem. * * This is the default routine that can be overridden by the user in * complicated cases. */ void problem_alloc(struct problem *p) { if (p->kind == PROBLEM_COMPLEX) { size_t s = p->size * p->vsize; p->phys_size = s; p->in = bench_malloc(s * sizeof(bench_complex)); if (p->in_place) p->out = p->in; else p->out = bench_malloc(s * sizeof(bench_complex)); } else { size_t s = p->vsize; unsigned int i; for (i = 0; i < p->rank; ++i) /* slightly overallocate to account for unpacked formats */ s *= p->n[i] + 2; p->phys_size = s; p->in = bench_malloc(s * sizeof(bench_real)); if (p->in_place) p->out = p->in; else p->out = bench_malloc(s * sizeof(bench_real)); } }
bench_tensor *mktensor(int rnk) { bench_tensor *x; BENCH_ASSERT(rnk >= 0); x = (bench_tensor *)bench_malloc(sizeof(bench_tensor)); if (BENCH_FINITE_RNK(rnk) && rnk > 0) x->dims = (bench_iodim *)bench_malloc(sizeof(bench_iodim) * rnk); else x->dims = 0; x->rnk = rnk; return x; }
static void alloc_local(ptrdiff_t nreal, int inplace) { bench_free(local_in); if (local_out != local_in) bench_free(local_out); local_in = local_out = 0; if (nreal > 0) { ptrdiff_t i; local_in = (bench_real*) bench_malloc(nreal * sizeof(bench_real)); if (inplace) local_out = local_in; else local_out = (bench_real*) bench_malloc(nreal * sizeof(bench_real)); for (i = 0; i < nreal; ++i) local_in[i] = local_out[i] = 0.0; } }
/* make a short option string for getopt from the long option description */ char *make_short_options(const struct option *opt) { int nopt; const struct option *p; char *s, *t; nopt = 0; for (p = opt; p->name; ++p) ++nopt; t = s = bench_malloc(3 * nopt + 1); for (p = opt; p->name; ++p) { if (!(p->flag) && isprint(p->val)) { *s++ = p->val; switch (p->has_arg) { case no_argument: break; case required_argument: *s++ = ':'; break; case optional_argument: *s++ = ':'; *s++ = ':'; break; } } } *s++ = '\0'; return t; }
void setup(struct problem *p) { int n, zero = 0; BENCH_ASSERT(can_do(p)); switch (p->rank) { case 1: { n = p->n[0]; if (p->kind == PROBLEM_COMPLEX) { /* * example code says that wsave consists of 3 * n * locations, but the code dumps core for n == 4 */ WSAVE = bench_malloc((3 * n + 4) * sizeof(bench_real)); if (SINGLE_PRECISION) CFFT1D(p->in, &n, &zero, WSAVE); else ZFFT1D(p->in, &n, &zero, WSAVE); } else { WSAVE = bench_malloc((4 * n) * sizeof(bench_real)); if (p->sign == -1) { if (SINGLE_PRECISION) SCFFT1D(p->in, &n, &zero, WSAVE); else DZFFT1D(p->in, &n, &zero, WSAVE); } else { if (SINGLE_PRECISION) CSFFT1D(p->in, &n, &zero, WSAVE); else ZDFFT1D(p->in, &n, &zero, WSAVE); } } break; } case 2: /* nothing to do */ break; default: BENCH_ASSERT(0); } }
void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e) { C *inA, *inB, *inC, *outA, *outB, *outC, *tmp; int n, vecn, N; dofft_rdft2_closure k; BENCH_ASSERT(p->kind == PROBLEM_REAL); if (!FINITE_RNK(p->sz->rnk) || !FINITE_RNK(p->vecsz->rnk)) return; /* give up */ k.k.apply = rdft2_apply; k.k.recopy_input = 0; k.p = p; if (rounds == 0) rounds = 20; /* default value */ n = tensor_sz(p->sz); vecn = tensor_sz(p->vecsz); N = n * vecn; inA = (C *) bench_malloc(N * sizeof(C)); inB = (C *) bench_malloc(N * sizeof(C)); inC = (C *) bench_malloc(N * sizeof(C)); outA = (C *) bench_malloc(N * sizeof(C)); outB = (C *) bench_malloc(N * sizeof(C)); outC = (C *) bench_malloc(N * sizeof(C)); tmp = (C *) bench_malloc(N * sizeof(C)); e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, tmp, rounds, tol); e->l = linear(&k.k, 1, N, inA, inB, inC, outA, outB, outC, tmp, rounds, tol); e->s = 0.0; if (p->sign < 0) e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign, inA, inB, outA, outB, tmp, rounds, tol, TIME_SHIFT)); else e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign, inA, inB, outA, outB, tmp, rounds, tol, FREQ_SHIFT)); if (!p->in_place && !p->destroy_input) preserves_input(&k.k, p->sign < 0 ? mkreal : mkhermitian1, N, inA, inB, outB, rounds); bench_free(tmp); bench_free(outC); bench_free(outB); bench_free(outA); bench_free(inC); bench_free(inB); bench_free(inA); }
void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds, double t[6]) { dofft_dft_closure k; int n; C *a, *b; BENCH_ASSERT(p->kind == PROBLEM_COMPLEX); BENCH_ASSERT(p->sz->rnk == 1); BENCH_ASSERT(p->vecsz->rnk == 0); k.k.apply = dft_apply; k.k.recopy_input = 0; k.p = p; n = tensor_sz(p->sz); a = (C *) bench_malloc(n * sizeof(C)); b = (C *) bench_malloc(n * sizeof(C)); accuracy_test(&k.k, 0, p->sign, n, a, b, rounds, impulse_rounds, t); bench_free(b); bench_free(a); }
void speed(const char *param) { double *t; int iter, k; bench_problem *p; double tmin, y; t = (double *) bench_malloc(time_repeat * sizeof(double)); p = problem_parse(param); BENCH_ASSERT(can_do(p)); problem_alloc(p); problem_zero(p); timer_start(); setup(p); p->setup_time = timer_stop(); start_over: for (iter = 1; iter < (1<<30); iter *= 2) { tmin = 1.0e20; for (k = 0; k < time_repeat; ++k) { timer_start(); doit(iter, p); y = timer_stop(); if (y < 0) /* yes, it happens */ goto start_over; t[k] = y; if (y < tmin) tmin = y; } if (tmin >= time_min) goto done; } goto start_over; /* this also happens */ done: done(p); for (k = 0; k < time_repeat; ++k) { t[k] /= iter; } report(p, t, time_repeat); problem_destroy(p); bench_free(t); return; }
void accuracy_r2r(bench_problem *p, int rounds, int impulse_rounds, double t[6]) { dofft_r2r_closure k; int n, n0 = 1; C *a, *b; aconstrain constrain = 0; BENCH_ASSERT(p->kind == PROBLEM_R2R); BENCH_ASSERT(p->sz->rnk == 1); BENCH_ASSERT(p->vecsz->rnk == 0); k.k.apply = r2r_apply; k.k.recopy_input = 0; k.p = p; n = tensor_sz(p->sz); switch (p->k[0]) { case R2R_R2HC: constrain = mkreal; n0 = n; break; case R2R_HC2R: constrain = mkhermitian1; n0 = n; break; case R2R_REDFT00: constrain = mkre00; n0 = 2*(n-1); break; case R2R_RODFT00: constrain = mkro00; n0 = 2*(n+1); break; case R2R_REDFT01: constrain = mkre01; n0 = 4*n; break; case R2R_REDFT10: constrain = mkre10; n0 = 4*n; break; case R2R_RODFT01: constrain = mkro01; n0 = 4*n; break; case R2R_RODFT10: constrain = mkio10; n0 = 4*n; break; case R2R_REDFT11: constrain = mkre11; n0 = 8*n; break; case R2R_RODFT11: constrain = mkro11; n0 = 8*n; break; default: BENCH_ASSERT(0); /* not yet implemented */ } k.n0 = n0; a = (C *) bench_malloc(n0 * sizeof(C)); b = (C *) bench_malloc(n0 * sizeof(C)); accuracy_test(&k.k, constrain, -1, n0, a, b, rounds, impulse_rounds, t); bench_free(b); bench_free(a); }
void accuracy_rdft2(bench_problem *p, int rounds, int impulse_rounds, double t[6]) { dofft_rdft2_closure k; int n; C *a, *b; BENCH_ASSERT(p->kind == PROBLEM_REAL); BENCH_ASSERT(p->sz->rnk == 1); BENCH_ASSERT(p->vecsz->rnk == 0); k.k.apply = rdft2_apply; k.k.recopy_input = 0; k.p = p; n = tensor_sz(p->sz); a = (C *) bench_malloc(n * sizeof(C)); b = (C *) bench_malloc(n * sizeof(C)); accuracy_test(&k.k, p->sign < 0 ? mkreal : mkhermitian1, p->sign, n, a, b, rounds, impulse_rounds, t); bench_free(b); bench_free(a); }
void verify_dft(bench_problem *p, int rounds, double tol, errors *e) { C *inA, *inB, *inC, *outA, *outB, *outC, *tmp; int n, vecn, N; dofft_dft_closure k; BENCH_ASSERT(p->kind == PROBLEM_COMPLEX); k.k.apply = dft_apply; k.k.recopy_input = 0; k.p = p; if (rounds == 0) rounds = 20; /* default value */ n = tensor_sz(p->sz); vecn = tensor_sz(p->vecsz); N = n * vecn; inA = (C *) bench_malloc(N * sizeof(C)); inB = (C *) bench_malloc(N * sizeof(C)); inC = (C *) bench_malloc(N * sizeof(C)); outA = (C *) bench_malloc(N * sizeof(C)); outB = (C *) bench_malloc(N * sizeof(C)); outC = (C *) bench_malloc(N * sizeof(C)); tmp = (C *) bench_malloc(N * sizeof(C)); e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC, tmp, rounds, tol); e->l = linear(&k.k, 0, N, inA, inB, inC, outA, outB, outC, tmp, rounds, tol); e->s = 0.0; e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign, inA, inB, outA, outB, tmp, rounds, tol, TIME_SHIFT)); e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign, inA, inB, outA, outB, tmp, rounds, tol, FREQ_SHIFT)); if (!p->in_place && !p->destroy_input) preserves_input(&k.k, 0, N, inA, inB, outB, rounds); bench_free(tmp); bench_free(outC); bench_free(outB); bench_free(outA); bench_free(inC); bench_free(inB); bench_free(inA); }
static void alloc_rnk(int rnk_) { rnk = rnk_; bench_free(local_ni); if (rnk == 0) local_ni = 0; else local_ni = (ptrdiff_t *) bench_malloc(sizeof(ptrdiff_t) * rnk * (8 + n_pes * 4)); local_starti = local_ni + rnk; local_no = local_ni + 2 * rnk; local_starto = local_ni + 3 * rnk; istrides = local_ni + 4 * rnk; ostrides = local_ni + 5 * rnk; total_ni = local_ni + 6 * rnk; total_no = local_ni + 7 * rnk; all_local_ni = local_ni + 8 * rnk; all_local_starti = local_ni + (8 + n_pes) * rnk; all_local_no = local_ni + (8 + 2 * n_pes) * rnk; all_local_starto = local_ni + (8 + 3 * n_pes) * rnk; }
static void do_scatter_in(bench_real *in) { bench_real *ali; int i; if (all_local_in_alloc) { bench_free(all_local_in); all_local_in = (bench_real*) bench_malloc(iNtot*sizeof(bench_real)); all_local_in_alloc = 0; } ali = all_local_in; for (i = 0; i < n_pes; ++i) { copy_block_in(ali, rnk, all_local_ni + i * rnk, all_local_starti + i * rnk, vn, istrides, vn, in); ali += isend_cnt[i]; } MPI_Scatterv(all_local_in, isend_cnt, isend_off, BENCH_MPI_TYPE, local_in, isend_cnt[my_pe], BENCH_MPI_TYPE, 0, MPI_COMM_WORLD); }
static void do_gather_out(bench_real *out) { bench_real *alo; int i; if (all_local_out_alloc) { bench_free(all_local_out); all_local_out = (bench_real*) bench_malloc(oNtot*sizeof(bench_real)); all_local_out_alloc = 0; } MPI_Gatherv(local_out, orecv_cnt[my_pe], BENCH_MPI_TYPE, all_local_out, orecv_cnt, orecv_off, BENCH_MPI_TYPE, 0, MPI_COMM_WORLD); MPI_Bcast(all_local_out, oNtot, BENCH_MPI_TYPE, 0, MPI_COMM_WORLD); alo = all_local_out; for (i = 0; i < n_pes; ++i) { copy_block_out(alo, rnk, all_local_no + i * rnk, all_local_starto + i * rnk, vn, ostrides, vn, out); alo += orecv_cnt[i]; } }
void verify_r2r(bench_problem *p, int rounds, double tol, errors *e) { R *inA, *inB, *inC, *outA, *outB, *outC, *tmp; info nfo; int n, vecn, N; double impulse_amp = 1.0; dim_stuff *d; int i; if (rounds == 0) rounds = 20; /* default value */ n = tensor_sz(p->sz); vecn = tensor_sz(p->vecsz); N = n * vecn; d = (dim_stuff *) bench_malloc(sizeof(dim_stuff) * p->sz->rnk); for (i = 0; i < p->sz->rnk; ++i) { int n0, i0, k0; trigfun ti, ts; d[i].n = n0 = p->sz->dims[i].n; if (p->k[i] > R2R_DHT) n0 = 2 * (n0 + (p->k[i] == R2R_REDFT00 ? -1 : (p->k[i] == R2R_RODFT00 ? 1 : 0))); switch (p->k[i]) { case R2R_R2HC: i0 = k0 = 0; ti = realhalf; ts = coshalf; break; case R2R_DHT: i0 = k0 = 0; ti = unity; ts = cos00; break; case R2R_HC2R: i0 = k0 = 0; ti = unity; ts = cos00; break; case R2R_REDFT00: i0 = k0 = 0; ti = ts = cos00; break; case R2R_REDFT01: i0 = k0 = 0; ti = ts = cos01; break; case R2R_REDFT10: i0 = k0 = 0; ti = cos10; impulse_amp *= 2.0; ts = cos00; break; case R2R_REDFT11: i0 = k0 = 0; ti = cos11; impulse_amp *= 2.0; ts = cos01; break; case R2R_RODFT00: i0 = k0 = 1; ti = sin00; impulse_amp *= 2.0; ts = cos00; break; case R2R_RODFT01: i0 = 1; k0 = 0; ti = sin01; impulse_amp *= n == 1 ? 1.0 : 2.0; ts = cos01; break; case R2R_RODFT10: i0 = 0; k0 = 1; ti = sin10; impulse_amp *= 2.0; ts = cos00; break; case R2R_RODFT11: i0 = k0 = 0; ti = sin11; impulse_amp *= 2.0; ts = cos01; break; default: BENCH_ASSERT(0); return; } d[i].n0 = n0; d[i].i0 = i0; d[i].k0 = k0; d[i].ti = ti; d[i].ts = ts; } inA = (R *) bench_malloc(N * sizeof(R)); inB = (R *) bench_malloc(N * sizeof(R)); inC = (R *) bench_malloc(N * sizeof(R)); outA = (R *) bench_malloc(N * sizeof(R)); outB = (R *) bench_malloc(N * sizeof(R)); outC = (R *) bench_malloc(N * sizeof(R)); tmp = (R *) bench_malloc(N * sizeof(R)); nfo.p = p; nfo.probsz = p->sz; nfo.totalsz = tensor_append(p->vecsz, nfo.probsz); nfo.pckdsz = verify_pack(nfo.totalsz, 1); nfo.pckdvecsz = verify_pack(p->vecsz, tensor_sz(nfo.probsz)); e->i = rimpulse(d, impulse_amp, n, vecn, &nfo, inA, inB, inC, outA, outB, outC, tmp, rounds, tol); e->l = rlinear(N, &nfo, inA, inB, inC, outA, outB, outC, tmp, rounds,tol); e->s = t_shift(n, vecn, &nfo, inA, inB, outA, outB, tmp, rounds, tol, d); /* grr, verify-lib.c:preserves_input() only works for complex */ if (!p->in_place && !p->destroy_input) { bench_tensor *totalsz_swap, *pckdsz_swap; totalsz_swap = tensor_copy_swapio(nfo.totalsz); pckdsz_swap = tensor_copy_swapio(nfo.pckdsz); for (i = 0; i < rounds; ++i) { rarand(inA, N); dofft(&nfo, inA, outB); cpyr((R *) nfo.p->in, totalsz_swap, inB, pckdsz_swap); racmp(inB, inA, N, "preserves_input", 0.0); } tensor_destroy(totalsz_swap); tensor_destroy(pckdsz_swap); } tensor_destroy(nfo.totalsz); tensor_destroy(nfo.pckdsz); tensor_destroy(nfo.pckdvecsz); bench_free(tmp); bench_free(outC); bench_free(outB); bench_free(outA); bench_free(inC); bench_free(inB); bench_free(inA); bench_free(d); }
void speed(const char *param, int setup_only) { double *t; int iter = 0, k; bench_problem *p; double tmin, y; t = (double *) bench_malloc(time_repeat * sizeof(double)); for (k = 0; k < time_repeat; ++k) t[k] = 0; p = problem_parse(param); BENCH_ASSERT(can_do(p)); if (!no_speed_allocation) { problem_alloc(p); problem_zero(p); } timer_start(LIBBENCH_TIMER); setup(p); p->setup_time = bench_cost_postprocess(timer_stop(LIBBENCH_TIMER)); /* reset the input to zero again, because the planner in paranoid mode sets it to random values, thus making the benchmark diverge. */ if (!no_speed_allocation) problem_zero(p); if (setup_only) goto done; start_over: for (iter = 1; iter < (1<<30); iter *= 2) { tmin = 1.0e20; for (k = 0; k < time_repeat; ++k) { timer_start(LIBBENCH_TIMER); doit(iter, p); y = bench_cost_postprocess(timer_stop(LIBBENCH_TIMER)); if (y < 0) /* yes, it happens */ goto start_over; t[k] = y; if (y < tmin) tmin = y; } if (tmin >= time_min) goto done; } goto start_over; /* this also happens */ done: done(p); if (iter) for (k = 0; k < time_repeat; ++k) t[k] /= iter; else for (k = 0; k < time_repeat; ++k) t[k] = 0; report(p, t, time_repeat); if (!no_speed_allocation) problem_destroy(p); bench_free(t); return; }
static const char *parsetensor(const char *s, bench_tensor **tp, r2r_kind_t **k) { struct dimlist *l = 0, *m; bench_tensor *t; int rnk = 0; L1: m = (struct dimlist *)bench_malloc(sizeof(struct dimlist)); /* nconc onto l */ m->cdr = l; l = m; ++rnk; s = parseint(s, &m->car.n); if (*s == ':') { /* read input stride */ ++s; s = parseint(s, &m->car.is); if (*s == ':') { /* read output stride */ ++s; s = parseint(s, &m->car.os); } else { /* default */ m->car.os = m->car.is; } } else { m->car.is = 0; m->car.os = 0; } if (*s == 'f' || *s == 'F') { m->k = R2R_R2HC; ++s; } else if (*s == 'b' || *s == 'B') { m->k = R2R_HC2R; ++s; } else if (*s == 'h' || *s == 'H') { m->k = R2R_DHT; ++s; } else if (*s == 'e' || *s == 'E' || *s == 'o' || *s == 'O') { char c = *(s++); int ab; s = parseint(s, &ab); if (c == 'e' || c == 'E') { if (ab == 0) m->k = R2R_REDFT00; else if (ab == 1) m->k = R2R_REDFT01; else if (ab == 10) m->k = R2R_REDFT10; else if (ab == 11) m->k = R2R_REDFT11; else BENCH_ASSERT(0); } else { if (ab == 0) m->k = R2R_RODFT00; else if (ab == 1) m->k = R2R_RODFT01; else if (ab == 10) m->k = R2R_RODFT10; else if (ab == 11) m->k = R2R_RODFT11; else BENCH_ASSERT(0); } } else m->k = R2R_R2HC; if (*s == 'x' || *s == 'X') { ++s; goto L1; } /* now we have a dimlist. Build bench_tensor, etc. */ if (k && rnk > 0) { int i; *k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * rnk); for (m = l, i = rnk - 1; i >= 0; --i, m = m->cdr) { BENCH_ASSERT(m); (*k)[i] = m->k; } } t = mktensor(rnk); while (--rnk >= 0) { bench_iodim *d = t->dims + rnk; BENCH_ASSERT(l); m = l; l = m->cdr; d->n = m->car.n; d->is = m->car.is; d->os = m->car.os; bench_free(m); } *tp = t; return s; }
/* parse a problem description, return a problem */ bench_problem *problem_parse(const char *s) { bench_problem *p; bench_iodim last_iodim0 = {1,1,1}, *last_iodim = &last_iodim0; bench_iodim *sz_last_iodim; bench_tensor *sz; n_transform nti = SAME, nto = SAME; int transpose = 0; p = (bench_problem *) bench_malloc(sizeof(bench_problem)); p->kind = PROBLEM_COMPLEX; p->k = 0; p->sign = -1; p->in = p->out = 0; p->inphys = p->outphys = 0; p->iphyssz = p->ophyssz = 0; p->in_place = 0; p->destroy_input = 0; p->split = 0; p->userinfo = 0; p->scrambled_in = p->scrambled_out = 0; p->sz = p->vecsz = 0; p->ini = p->outi = 0; p->pstring = (char *) bench_malloc(sizeof(char) * (strlen(s) + 1)); strcpy(p->pstring, s); L1: switch (tolower(*s)) { case 'i': p->in_place = 1; ++s; goto L1; case 'o': p->in_place = 0; ++s; goto L1; case 'd': p->destroy_input = 1; ++s; goto L1; case '/': p->split = 1; ++s; goto L1; case 'f': case '-': p->sign = -1; ++s; goto L1; case 'b': case '+': p->sign = 1; ++s; goto L1; case 'r': p->kind = PROBLEM_REAL; ++s; goto L1; case 'c': p->kind = PROBLEM_COMPLEX; ++s; goto L1; case 'k': p->kind = PROBLEM_R2R; ++s; goto L1; case 't': transpose = 1; ++s; goto L1; /* hack for MPI: */ case '[': p->scrambled_in = 1; ++s; goto L1; case ']': p->scrambled_out = 1; ++s; goto L1; default : ; } s = parsetensor(s, &sz, p->kind == PROBLEM_R2R ? &p->k : 0); if (p->kind == PROBLEM_REAL) { if (p->sign < 0) { nti = p->in_place || always_pad_real ? PADDED : SAME; nto = HALFISH; } else { nti = HALFISH; nto = p->in_place || always_pad_real ? PADDED : SAME; } } sz_last_iodim = sz->dims + sz->rnk - 1; if (*s == '*') { /* "external" vector */ ++s; p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); s = parsetensor(s, &sz, 0); p->vecsz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); } else if (*s == 'v' || *s == 'V') { /* "internal" vector */ bench_tensor *vecsz; ++s; s = parsetensor(s, &vecsz, 0); p->vecsz = dwim(vecsz, &last_iodim, nti, nto, sz_last_iodim); p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); } else { p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim); p->vecsz = mktensor(0); } if (transpose) { transpose_tensor(p->sz); transpose_tensor(p->vecsz); } if (!p->in_place) p->out = ((bench_real *) p->in) + (1 << 20); /* whatever */ BENCH_ASSERT(p->sz && p->vecsz); BENCH_ASSERT(!*s); return p; }
void setup(struct problem *p) { int isig = 0; /* indicates initialization call */ BENCH_ASSERT(can_do(p)); switch (p->rank) { case 2: n2 = p->n[0]; n1 = p->n[1]; break; case 3: n3 = p->n[0]; n2 = p->n[1]; n1 = p->n[2]; break; case 4: n4 = p->n[0]; n3 = p->n[1]; n2 = p->n[2]; n1 = p->n[3]; break; } if (p->kind == PROBLEM_COMPLEX) ldn1 = n1; else ldn1 = n1 + 2; w1 = (int *) bench_malloc(sizeof(int) * ((p->kind == PROBLEM_REAL ? 6 : 4) * n1 + 14)); if (iopt == 1 && p->rank == 3) w2 = (int *) bench_malloc(sizeof(int) * (4*n2*(n1+1) + 14)); else w2 = (int *) bench_malloc(sizeof(int) * (4*n2 + 14)); w3 = (int *) bench_malloc(sizeof(int) * (4*n3 + 14)); w4 = (int *) bench_malloc(sizeof(int) * (4*n4 + 14)); iwork = (int *) bench_malloc(sizeof(int) * MAX2(n1,MAX2(n2,MAX2(n3,n4)))); if (p->kind == PROBLEM_COMPLEX) { switch (p->rank) { case 2: C2FFT((bench_complex*) p->in, &ldn1, &n1, &n2, w1, w2, &isig, &iord, iwork, &ierr); break; case 3: C3FFT((bench_complex*) p->in, &ldn1, &n1, &n2, &n3, w1, w2, w3, &iopt, &isig, &iord, iwork, &ierr); break; case 4: C4FFT((bench_complex*) p->in, &ldn1, &n2, &n1, &n2, &n3, &n4, w1, w2, w3, w4, &isig, &iord, iwork, &ierr); break; } } else /* PROBLEM_REAL */ { switch (p->rank) { case 2: R2FFT((bench_complex*) p->in, &ldn1, &n1, &n2, w1, w2, &isig, &iord, iwork, &ierr); break; case 3: R3FFT((bench_complex*) p->in, &ldn1, &n1, &n2, &n3, w1, w2, w3, &iopt, &isig, &iord, iwork, &ierr); break; case 4: R4FFT((bench_complex*) p->in, &ldn1, &n2, &n1, &n2, &n3, &n4, w1, w2, w3, w4, &isig, &iord, iwork, &ierr); break; } } BENCH_ASSERT(ierr == 0); }
/* * Allocate I/O arrays for a problem. * * This is the default routine that can be overridden by the user in * complicated cases. */ void problem_alloc(bench_problem *p) { int ilb, iub, olb, oub; int isz, osz; bounds(p, &ilb, &iub, &olb, &oub); isz = iub - ilb; osz = oub - olb; if (p->kind == PROBLEM_COMPLEX) { bench_complex *in, *out; p->iphyssz = isz; p->inphys = in = (bench_complex *) bench_malloc(isz * sizeof(bench_complex)); p->in = in - ilb; if (p->in_place) { p->out = p->in; p->outphys = p->inphys; p->ophyssz = p->iphyssz; } else { p->ophyssz = osz; p->outphys = out = (bench_complex *) bench_malloc(osz * sizeof(bench_complex)); p->out = out - olb; } } else if (p->kind == PROBLEM_R2R) { bench_real *in, *out; p->iphyssz = isz; p->inphys = in = (bench_real *) bench_malloc(isz * sizeof(bench_real)); p->in = in - ilb; if (p->in_place) { p->out = p->in; p->outphys = p->inphys; p->ophyssz = p->iphyssz; } else { p->ophyssz = osz; p->outphys = out = (bench_real *) bench_malloc(osz * sizeof(bench_real)); p->out = out - olb; } } else if (p->kind == PROBLEM_REAL && p->sign < 0) { /* R2HC */ bench_real *in; bench_complex *out; p->iphyssz = p->in_place ? (isz > osz*2 ? isz : osz*2) : isz; p->inphys = in = (bench_real *) bench_malloc(p->iphyssz * sizeof(bench_real)); p->in = in - ilb; if (p->in_place) { p->out = p->in; p->outphys = p->inphys; p->ophyssz = p->iphyssz / 2; } else { p->ophyssz = osz; p->outphys = out = (bench_complex *) bench_malloc(osz * sizeof(bench_complex)); p->out = out - olb; } } else if (p->kind == PROBLEM_REAL && p->sign > 0) { /* HC2R */ bench_real *out; bench_complex *in; p->ophyssz = p->in_place ? (osz > isz*2 ? osz : isz*2) : osz; p->outphys = out = (bench_real *) bench_malloc(p->ophyssz * sizeof(bench_real)); p->out = out - olb; if (p->in_place) { p->in = p->out; p->inphys = p->outphys; p->iphyssz = p->ophyssz / 2; } else { p->iphyssz = isz; p->inphys = in = (bench_complex *) bench_malloc(isz * sizeof(bench_complex)); p->in = in - ilb; } } else { BENCH_ASSERT(0); /* TODO */ } }