int ranhprob(int n, int a, int m) // hypergeometric sampling // rejection sampling. Devroye. Computing (1987) General method for log-concave densities // where mode is known /** urn with n balls . a black balls. Pick m without replacement. Return number of black balls picked. */ { double y ; double pm, logpm, w, ru, rw, rat ; int mode, x, zans ; mode = modehprob(n, a, m) ; logpm = loghprob(n, a, m, mode) ; pm = exp(logpm) ; w = 1 + pm ; for (;;) { ru = DRAND() ; rw = DRAND() ; if (ru <= w/(1+w)) y = DRAND()*w/pm ; else y = (w+ranexp())/pm ; x = nnint(y) ; if (ranmod(2)==0) x = -x ; zans = mode+x ; if (zans<0) continue ; if (zans>a) continue ; rat = exp(loghprob(n, a, m, zans)-logpm) ; rw *= MIN(1, exp(w-pm*y)) ; if (rw <= rat) break ; } return zans ; }
void testnd_out_of_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan) { int istride, ostride; int N, dim, i; fftw_complex *in1, *in2, *out1, *out2; fftwnd_plan p; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); out1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); p = fftwnd_create_plan(rank, n, dir, flags); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < N; ++i) { int j; c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re(in2[i]); c_im(in1[i * istride + j]) = c_im(in2[i]); } } for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { int howmany = (istride < ostride) ? istride : ostride; if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) fftwnd_threads(nthreads, p, howmany, in1, istride, 1, out1, ostride, 1); else fftwnd_threads_one(nthreads, p, in1, out1); fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (i = 0; i < howmany; ++i) CHECK(compute_error_complex(out1 + i, ostride, out2, 1, N) < TOLERANCE, "testnd_out_of_place: wrong answer"); } } fftwnd_destroy_plan(p); fftw_free(out2); fftw_free(in2); fftw_free(out1); fftw_free(in1); }
/************************************************* * correctness tests *************************************************/ void fill_random(fftw_complex *a, int n) { int i; /* generate random inputs */ for (i = 0; i < n; ++i) { c_re(a[i]) = DRAND(); c_im(a[i]) = DRAND(); } }
void fill_random(fftw_real * a, int n, int stride) { int i; /* generate random inputs */ for (i = 0; i < n; ++i) a[i * stride] = DRAND(); }
/* pick random list level */ static int random_level (int maxlevel) { int n = 0; while (DRAND() < 0.5 && n < maxlevel) n ++; return n; }
/** * Load random 16 floats between 0 and 1 int matrix \pname * and return them in \m. */ static void load_matrix(float m[16], const GLenum pname) { glMatrixMode(pname); for (int i = 0; i < 16; ++i) m[i] = DRAND(); glLoadMatrixf(m); }
/************************************************* * Ergun's test for real->complex transforms *************************************************/ static void rfill_random(fftw_real *a, int n) { int i; for (i = 0; i < n; ++i) { a[i] = DRAND(); } }
double drand2() { double x, y ; double maxran, maxran1 ; static double eps = -1.0 ; /** DRAND is quantized 1/2^31 call it twice and get max precision */ if (eps < 0.0) { maxran = 1.0-DBL_EPSILON ; maxran1 = (double) (BIGINT-1) / (double) BIGINT ; eps = maxran - maxran1 ; } x = DRAND() ; y = DRAND() ; return x + y * eps ; }
int ranhprob(int n, int a, int m) // rejection sampling. Devroye { double v, y ; double pm, logpm, w, ru, rw, rat ; int mode, k, x, zans ; v = (double) (a+1)*(m+1) / (double) (n+1) ; mode = (int) v ; /** for (k=-5; k<=5; ++k) { x = mode+k ; y = exp(loghprob(n, a, m, x)) ; printf("%4d %4d %12.6f\n", mode, x, y) ; } */ logpm = loghprob(n, a, m, mode) ; pm = exp(logpm) ; w = 1 + pm ; for (;;) { ru = DRAND() ; rw = DRAND() ; if (ru <= w/(1+w)) y = DRAND()*w/pm ; else y = (w+ranexp())/pm ; x = nnint(y) ; if (ranmod(2)==0) x = -x ; zans = mode+x ; if (zans<0) continue ; if (zans>a) continue ; rat = exp(loghprob(n, a, m, zans)-logpm) ; rw *= MIN(1, exp(1.0-pm*y)) ; if (rw <= rat) break ; } return zans ; }
enum piglit_result piglit_display(void) { bool pass = true; float val[4]; /* Material Property Bindings */ for (int s = 0; s < 2; ++s) { for (int p = 0; p < 4; ++p) { const GLenum pname[] = {GL_EMISSION, GL_AMBIENT, GL_DIFFUSE, GL_SPECULAR}; random_vec4(val); glMaterialfv(GL_FRONT + s, pname[p], val); pass = check_prg_param(val, "state.material.%s.%s", s ? "back" : "front", enum2program(pname[p])) && pass; /* The front material bindings are also accessible * without ".front.". */ if (s == 0) pass = check_prg_param( val, "state.material.%s", enum2program(pname[p])) && pass; } val[0] = DRAND(); val[1] = 0; val[2] = 0; val[3] = 1; glMaterialf(GL_FRONT + s, GL_SHININESS, val[0]); pass = check_prg_param(val, "state.material.%s.shininess", s ? "back" : "front") && pass; if (s == 0) pass = check_prg_param(val, "state.material.shininess") && pass; } /* Light Property Bindings */ int max_lights; glGetIntegerv(GL_MAX_LIGHTS, &max_lights); for (int l = 0; l < max_lights; ++l) { for (int p = 0; p < 4; ++p) { const GLenum pname[] = {GL_AMBIENT, GL_DIFFUSE, GL_SPECULAR, GL_POSITION}; random_vec4(val); glLightfv(GL_LIGHT0 + l, pname[p], val); pass = check_prg_param(val, "state.light[%d].%s", l, enum2program(pname[p])) && pass; } random_vec4(val); glLightf(GL_LIGHT0 + l, GL_CONSTANT_ATTENUATION, val[0]); glLightf(GL_LIGHT0 + l, GL_LINEAR_ATTENUATION, val[1]); glLightf(GL_LIGHT0 + l, GL_QUADRATIC_ATTENUATION, val[2]); glLightf(GL_LIGHT0 + l, GL_SPOT_EXPONENT, val[3]); pass = check_prg_param(val, "state.light[%d].attenuation", l) && pass; random_vec4(val); glLightfv(GL_LIGHT0 + l, GL_SPOT_DIRECTION, val); glLightf(GL_LIGHT0 + l, GL_SPOT_CUTOFF, val[3]); val[3] = cosf(val[3] / 180 * M_PI); pass = check_prg_param(val, "state.light[%d].spot.direction", l) && pass; for (int c = 0; c < 3; ++c) val[c] = DRAND(); val[3] = 1; glLightfv(GL_LIGHT0 + l, GL_POSITION, val); normalize(val); val[2] += 1; normalize(val); pass = check_prg_param(val, "state.light[%d].half", l) && pass; } random_vec4(val); glLightModelfv(GL_LIGHT_MODEL_AMBIENT, val); pass = check_prg_param(val, "state.lightmodel.ambient") && pass; for (int s = 0; s < 2; ++s) { float scene_color[4]; for (int c = 0; c < 4; ++c) scene_color[c] = val[c] = DRAND(); glMaterialfv(GL_FRONT + s, GL_AMBIENT, val); for (int c = 0; c < 4; ++c) scene_color[c] *= val[c] = DRAND(); glLightModelfv(GL_LIGHT_MODEL_AMBIENT, val); for (int c = 0; c < 4; ++c) scene_color[c] += val[c] = DRAND(); glMaterialfv(GL_FRONT + s, GL_EMISSION, val); /* Page 63 (77 of the PDF) of the OpenGL 2.0 spec says: * * "The value of A produced by lighting is the alpha * value associated with d_{cm}." * * I'm not sure if this applies to the scene color, but both * Mesa and the NVIDIA driver do this. */ random_vec4(val); glMaterialfv(GL_FRONT + s, GL_DIFFUSE, val); scene_color[3] = val[3]; pass = check_prg_param(scene_color, "state.lightmodel.%s.scenecolor", s ? "back" : "front") && pass; if (s == 0) pass = check_prg_param( scene_color, "state.lightmodel.scenecolor") && pass; } for (int s = 0; s < 2; ++s) { for (int l = 0; l < max_lights; ++l) { const GLenum pname[] = {GL_AMBIENT, GL_DIFFUSE, GL_SPECULAR}; for (int p = 0; p < 3; ++p) { float light_product[4]; for (int c = 0; c < 4; ++c) light_product[c] = val[c] = DRAND(); glLightfv(GL_LIGHT0 + l, pname[p], val); for (int c = 0; c < 4; ++c) light_product[c] *= val[c] = DRAND(); glMaterialfv(GL_FRONT + s, pname[p], val); /* XXX: I have no Idea where the spec says the * alpha value of the light product is the * material's alpha value, but both Mesa and * the NVIDIA driver do this. */ light_product[3] = val[3]; pass = check_prg_param( light_product, "state.lightprod[%d].%s.%s", l, s ? "back" : "front", enum2program(pname[p])) && pass; if (s == 0) pass = check_prg_param( light_product, "state.lightprod[%d]." "%s", l, enum2program( pname[p])) && pass; } } } /* Texture Coordinate Generation Property Bindings */ int max_texture_coords; glGetIntegerv(GL_MAX_TEXTURE_COORDS, &max_texture_coords); for (int t = 0; t < max_texture_coords; ++t) { const GLenum coord[] = {GL_S, GL_T, GL_R, GL_Q}; glActiveTexture(GL_TEXTURE0 + t); for (int co = 0; co < 4; ++co) { const GLenum plane[] = {GL_EYE_PLANE, GL_OBJECT_PLANE}; const char *plane_name[] = {"eye", "object"}; for (int pl = 0; pl < 2; ++pl) { random_vec4(val); glTexGenfv(coord[co], plane[pl], val); pass = check_prg_param( val, "state.texgen[%d].%s.%s", t, plane_name[pl], enum2program(coord[co])) && pass; if (t == 0) pass = check_prg_param( val, "state.texgen.%s.%s", plane_name[pl], enum2program( coord[co])) && pass; } } } /* Fog Property Bindings */ random_vec4(val); glFogfv(GL_FOG_COLOR, val); pass = check_prg_param(val, "state.fog.color") && pass; random_vec4(val); glFogf(GL_FOG_DENSITY, val[0]); glFogf(GL_FOG_START, val[1]); glFogf(GL_FOG_END, val[2]); val[3] = 1 / (val[2] - val[1]); pass = check_prg_param(val, "state.fog.params") && pass; /* Clip Plane Property Bindings */ int max_clip_planes; glGetIntegerv(GL_MAX_CLIP_PLANES, &max_clip_planes); for (int cp = 0; cp < max_clip_planes; ++cp) { double vald[4]; for (int c = 0; c < 4; ++c) vald[c] = val[c] = DRAND(); glClipPlane(GL_CLIP_PLANE0 + cp, vald); pass = check_prg_param(val, "state.clip[%d].plane", cp) && pass; } /* Point Property Bindings */ random_vec4(val); glPointSize(val[0]); glPointParameterf(GL_POINT_SIZE_MIN, val[1]); glPointParameterf(GL_POINT_SIZE_MAX, val[2]); glPointParameterf(GL_POINT_FADE_THRESHOLD_SIZE, val[3]); pass = check_prg_param(val, "state.point.size") && pass; random_vec4(val); val[3] = 1; glPointParameterfv(GL_POINT_DISTANCE_ATTENUATION, val); pass = check_prg_param(&val[0], "state.point.attenuation") && pass; return pass ? PIGLIT_PASS : PIGLIT_FAIL; }
static void random_vec4(float *v) { for (int i = 0; i < 4; ++i) v[i] = DRAND(); }
void testnd_in_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan, int alternate_api, int specific, int force_buffered) { int istride; int N, dim, i; fftw_complex *in1, *in2, *out2; fftwnd_plan p; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; if (force_buffered) flags |= FFTWND_FORCE_BUFFERED; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (!specific) { if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_create_plan(n[0], n[1], dir, flags); else p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags); } else /* standard api */ p = fftwnd_create_plan(rank, n, dir, flags); } else { /* specific plan creation */ if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_create_plan_specific(n[0], n[1], dir, flags, in1, 1, (fftw_complex *) NULL, 1); else p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags, in1, 1, (fftw_complex *) NULL, 1); } else /* standard api */ p = fftwnd_create_plan_specific(rank, n, dir, flags, in1, 1, (fftw_complex *) NULL, 1); } for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* * generate random inputs */ for (i = 0; i < N; ++i) { int j; c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re(in2[i]); c_im(in1[i * istride + j]) = c_im(in2[i]); } } if (istride != 1 || istride != 1 || coinflip()) fftwnd(p, istride, in1, istride, 1, (fftw_complex *) NULL, 1, 1); else fftwnd_one(p, in1, NULL); fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (i = 0; i < istride; ++i) CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE, "testnd_in_place: wrong answer"); } fftwnd_destroy_plan(p); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in1, *in2, *out2; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); if (!specific) plan = fftw_create_plan(n, dir, flags); else plan = fftw_create_plan_specific(n, dir, flags, in1, istride, (fftw_complex *) NULL, 0); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); } /* * fill in other positions of the array, to make sure that * fftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride + j]) = i * istride + j; c_im(in1[i * istride + j]) = i * istride - j; } CHECK(plan != NULL, "can't create plan"); WHEN_VERBOSE(2, fftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) fftw(plan, howmany, in1, istride, n * istride, (fftw_complex *) NULL, 0, 0); else fftw_one(plan, in1, NULL); fftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(c_re(in1[i * istride + j]) == i * istride + j && c_im(in1[i * istride + j]) == i * istride - j, "input has been overwritten"); for (i = 0; i < howmany; ++i) { fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); } CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); }
/* Same as test_ergun, but for multi-dimensional transforms: */ void testnd_ergun(int rank, int *n, fftw_direction dir, fftwnd_plan plan) { fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; fftw_complex *tmp; fftw_complex impulse; int N, n_before, n_after, dim; int i, which_impulse; int rounds = 20; FFTW_TRIG_REAL twopin; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; inA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); inB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); inC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outA = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outB = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); outC = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); tmp = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); WHEN_VERBOSE(2, printf("Validating plan, N = %d, dir = %s\n", N, dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_complex alpha, beta; c_re(alpha) = DRAND(); c_im(alpha) = DRAND(); c_re(beta) = DRAND(); c_im(beta) = DRAND(); fill_random(inA, N); fill_random(inB, N); fftwnd(plan, 1, inA, 1, N, outA, 1, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); array_scale(outA, alpha, N); array_scale(outB, beta, N); array_add(tmp, outA, outB, N); array_scale(inA, alpha, N); array_scale(inB, beta, N); array_add(inC, inA, inB, N); fftwnd(plan, 1, inC, 1, N, outC, 1, N); array_compare(outC, tmp, N); } /* * test 2: check that the unit impulse is transformed properly -- we * need to test both the real and imaginary impulses */ for (which_impulse = 0; which_impulse < 2; ++which_impulse) { if (which_impulse == 0) { /* real impulse */ c_re(impulse) = 1.0; c_im(impulse) = 0.0; } else { /* imaginary impulse */ c_re(impulse) = 0.0; c_im(impulse) = 1.0; } for (i = 0; i < N; ++i) { /* impulse */ c_re(inA[i]) = 0.0; c_im(inA[i]) = 0.0; /* transform of the impulse */ outA[i] = impulse; } inA[0] = impulse; for (i = 0; i < rounds; ++i) { fill_random(inB, N); array_sub(inC, inA, inB, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); fftwnd(plan, 1, inC, 1, N, outC, 1, N); array_add(tmp, outB, outC, N); array_compare(tmp, outA, N); } } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ /* -- we have to check shifts in each dimension */ n_before = 1; n_after = N; for (dim = 0; dim < rank; ++dim) { int n_cur = n[dim]; n_after /= n_cur; twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n_cur; for (i = 0; i < rounds; ++i) { int j, jb, ja; fill_random(inA, N); array_rol(inB, inA, n_cur, n_before, n_after); fftwnd(plan, 1, inA, 1, N, outA, 1, N); fftwnd(plan, 1, inB, 1, N, outB, 1, N); for (jb = 0; jb < n_before; ++jb) for (j = 0; j < n_cur; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); for (ja = 0; ja < n_after; ++ja) { c_re(tmp[(jb * n_cur + j) * n_after + ja]) = c_re(outB[(jb * n_cur + j) * n_after + ja]) * c - c_im(outB[(jb * n_cur + j) * n_after + ja]) * s; c_im(tmp[(jb * n_cur + j) * n_after + ja]) = c_re(outB[(jb * n_cur + j) * n_after + ja]) * s + c_im(outB[(jb * n_cur + j) * n_after + ja]) * c; } } array_compare(tmp, outA, N); } n_before *= n_cur; } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB); fftw_free(inA); }
/* * Implementation of the FFT tester described in * * Funda Ergün. Testing multivariate linear functions: Overcoming the * generator bottleneck. In Proceedings of the Twenty-Seventh Annual * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, * Nevada, 29 May--1 June 1995. */ void test_ergun(int n, fftw_direction dir, fftw_plan plan) { fftw_complex *inA, *inB, *inC, *outA, *outB, *outC; fftw_complex *tmp; fftw_complex impulse; int i; int rounds = 20; FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; inA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); inB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); inC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outA = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outB = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); outC = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); tmp = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); WHEN_VERBOSE(2, printf("Validating plan, n = %d, dir = %s\n", n, dir == FFTW_FORWARD ? "FORWARD" : "BACKWARD")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_complex alpha, beta; c_re(alpha) = DRAND(); c_im(alpha) = DRAND(); c_re(beta) = DRAND(); c_im(beta) = DRAND(); fill_random(inA, n); fill_random(inB, n); fftw_out_of_place(plan, n, inA, outA); fftw_out_of_place(plan, n, inB, outB); array_scale(outA, alpha, n); array_scale(outB, beta, n); array_add(tmp, outA, outB, n); array_scale(inA, alpha, n); array_scale(inB, beta, n); array_add(inC, inA, inB, n); fftw_out_of_place(plan, n, inC, outC); array_compare(outC, tmp, n); } /* test 2: check that the unit impulse is transformed properly */ c_re(impulse) = 1.0; c_im(impulse) = 0.0; for (i = 0; i < n; ++i) { /* impulse */ c_re(inA[i]) = 0.0; c_im(inA[i]) = 0.0; /* transform of the impulse */ outA[i] = impulse; } inA[0] = impulse; for (i = 0; i < rounds; ++i) { fill_random(inB, n); array_sub(inC, inA, inB, n); fftw_out_of_place(plan, n, inB, outB); fftw_out_of_place(plan, n, inC, outC); array_add(tmp, outB, outC, n); array_compare(tmp, outA, n); } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ for (i = 0; i < rounds; ++i) { int j; fill_random(inA, n); array_rol(inB, inA, n, 1, 1); fftw_out_of_place(plan, n, inA, outA); fftw_out_of_place(plan, n, inB, outB); for (j = 0; j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); c_re(tmp[j]) = c_re(outB[j]) * c - c_im(outB[j]) * s; c_im(tmp[j]) = c_re(outB[j]) * s + c_im(outB[j]) * c; } array_compare(tmp, outA, n); } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB); fftw_free(inA); }
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan, int alternate_api, int specific) { int istride, ostride, howmany; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real)); out3 = in1; out1 = (fftw_complex *) in1; in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (alternate_api && specific && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan_specific(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw2d_create_plan_specific(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } else { p = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } } else if (specific) { p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); } else if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags); } else { p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags); } } else { p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); } CHECK(p != NULL && ip != NULL, "can't create plan"); for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i) out3[i] = 0; for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nhc * 2 + j) * istride + k] = c_re(in2[i * nr + j]); } fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); howmany = ostride = istride; WHEN_VERBOSE(2, printf("\n testing in-place stride %d...", istride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, NULL); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "in-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, NULL); for (i = 0; i < nc * nhc * 2 * istride; ++i) out3[i] *= 1.0 / N; for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nhc * 2 * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "in-place (c2r): wrong answer (check 2)"); } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
int generate(int count) { FILE *ofp; int length; /* length of seqence */ double probs[20]; /* probability mass function for AAs */ double cum_probs[20]; /* probability distribution * (cumulative sum of probs) */ int s; /* counter for sequences */ int p; /* counter for position in sequence */ int a; /* counter for amino acid number */ int alo, ahi, amid; /* variables for binary search to get amino acid */ double sum; /* temporary for computing cum_probs */ double x; /* uniform random number */ gen_dirch_mix_param comp_gen; /* composition generator */ ofp = fopen("GeneratedSequences.txt", "w"); if (ofp == NULL) { fprintf(stderr, "Can't open output file\n"); return 1; } srandom(getpid()); gen_dirch_mix_initialize(&comp_gen, 20, 6, mix_coeff, (const double **)comps); for (s=0; s<count; s++) { length = (int) exp(mean_log_length +stddev_log_length*gen_norm()); gen_dirch_mix(&comp_gen, probs); sum = 0.; /* convert probs to cumulative probabilities */ for (a=0; a<20; a++) { sum += probs[a]; cum_probs[a] =sum; } for (p=0; p< length; p++) { x = DRAND(); /* do binary search to determine amino acid */ alo = -1; ahi=19; while (alo<ahi-1) { /* invariant: * cum_probs[alo] < x <= cum_probs[ahi] */ assert (x <= cum_probs[ahi]); amid = (alo+ahi+1)/2; if (x > cum_probs[amid]) alo=amid; else ahi=amid; } if ( fputc(AA[ahi], ofp) == EOF ) return 1; } if ( fputc('\n', ofp) == EOF ) return 1; } if ( fclose(ofp) != EOF ) return 0; else return 1; }
int InitVelocities(double h) { int i, nmoles1, nmoles2, iseed; double ts, sp, sc, r, s; double u1, u2, v1, v2, ujunk,tscale; double DRAND(double); iseed = 4711; ujunk = DRAND(iseed); (void)ujunk; // explicitly turn off unused variable warning iseed = 0; tscale = (16.0)/(1.0*numMoles - 1.0); for ( i =0; i< n3; i=i+2) { do { u1 = DRAND(iseed); u2 = DRAND(iseed); v1 = 2.0 * u1 - 1.0; v2 = 2.0 * u2 - 1.0; s = v1*v1 + v2*v2 ; } while( s >= 1.0 ); r = SQRT( -2.0*log(s)/s ); vh[i] = v1 * r; vh[i+1] = v2 * r; } // There are three parts - repeat for each part nmoles1 = n3/3 ; nmoles2 = nmoles1 * 2; // Find the average speed for the 1st part sp = 0.0 ; for ( i=0; i<nmoles1; i++) { sp = sp + vh[i]; } sp = sp/nmoles1; // Subtract average from all velocities of 1st part for ( i=0; i<nmoles1; i++) { vh[i] = vh[i] - sp; } // Find the average speed for 2nd part sp = 0.0 ; for ( i=nmoles1; i<nmoles2; i++) { sp = sp + vh[i]; } sp = sp/(nmoles2-nmoles1); // Subtract average from all velocities of 2nd part for ( i=nmoles1; i<nmoles2; i++) { vh[i] = vh[i] - sp; } // Find the average speed for 2nd part sp = 0.0 ; for ( i=nmoles2; i<n3; i++) { sp = sp + vh[i]; } sp = sp/(n3-nmoles2); // Subtract average from all velocities of 2nd part for ( i=nmoles2; i<n3; i++) { vh[i] = vh[i] - sp; } // Determine total kinetic energy ekin = 0.0 ; for ( i=0 ; i< n3; i++ ) { ekin = ekin + vh[i]*vh[i] ; } ts = tscale * ekin ; sc = h * SQRT(TEMPERATURE/ts); for ( i=0; i< n3; i++) { vh[i] = vh[i] * sc ; } return 0; }
/* * This is a real (as opposed to complex) variation of the FFT tester * described in * * Funda Ergün. Testing multivariate linear functions: Overcoming the * generator bottleneck. In Proceedings of the Twenty-Seventh Annual * ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas, * Nevada, 29 May--1 June 1995. */ void test_ergun(int n, fftw_direction dir, fftw_plan plan) { fftw_real *inA, *inB, *inC, *outA, *outB, *outC; fftw_real *inA1, *inB1; fftw_real *tmp; int i; int rounds = 20; FFTW_TRIG_REAL twopin = FFTW_K2PI / (FFTW_TRIG_REAL) n; inA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inA1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inB1 = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); inC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outA = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outB = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); outC = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real)); WHEN_VERBOSE(2, printf("Validating plan, n = %d, dir = %s\n", n, dir == FFTW_REAL_TO_COMPLEX ? "REAL_TO_COMPLEX" : "COMPLEX_TO_REAL")); /* test 1: check linearity */ for (i = 0; i < rounds; ++i) { fftw_real alpha, beta; alpha = DRAND(); beta = DRAND(); rfill_random(inA, n); rfill_random(inB, n); rarray_scale(inA1, inA, alpha, n); rarray_scale(inB1, inB, beta, n); rarray_add(inC, inA1, inB1, n); rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); rarray_scale(outA, outA, alpha, n); rarray_scale(outB, outB, beta, n); rarray_add(tmp, outA, outB, n); rfftw_out_of_place(plan, n, inC, outC); rarray_compare(outC, tmp, n); } /* test 2: check that the unit impulse is transformed properly */ for (i = 0; i < n; ++i) { /* impulse */ inA[i] = 0.0; /* transform of the impulse */ if (2 * i <= n) outA[i] = 1.0; else outA[i] = 0.0; } inA[0] = 1.0; if (dir == FFTW_REAL_TO_COMPLEX) { for (i = 0; i < rounds; ++i) { rfill_random(inB, n); rarray_sub(inC, inA, inB, n); rfftw_out_of_place(plan, n, inB, outB); rfftw_out_of_place(plan, n, inC, outC); rarray_add(tmp, outB, outC, n); rarray_compare(tmp, outA, n); } } else { for (i = 0; i < rounds; ++i) { rfill_random(outB, n); rarray_sub(outC, outA, outB, n); rfftw_out_of_place(plan, n, outB, inB); rfftw_out_of_place(plan, n, outC, inC); rarray_add(tmp, inB, inC, n); rarray_scale(tmp, tmp, 1.0 / ((double) n), n); rarray_compare(tmp, inA, n); } } /* test 3: check the time-shift property */ /* the paper performs more tests, but this code should be fine too */ if (dir == FFTW_REAL_TO_COMPLEX) { for (i = 0; i < rounds; ++i) { int j; rfill_random(inA, n); rarray_rol(inB, inA, n, 1, 1); rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); tmp[0] = outB[0]; for (j = 1; 2 * j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); tmp[j] = outB[j] * c - outB[n - j] * s; tmp[n - j] = outB[j] * s + outB[n - j] * c; } if (2 * j == n) tmp[j] = -outB[j]; rarray_compare(tmp, outA, n); } } else { for (i = 0; i < rounds; ++i) { int j; rfill_random(inA, n); inB[0] = inA[0]; for (j = 1; 2 * j < n; ++j) { FFTW_TRIG_REAL s = dir * FFTW_TRIG_SIN(j * twopin); FFTW_TRIG_REAL c = FFTW_TRIG_COS(j * twopin); inB[j] = inA[j] * c - inA[n - j] * s; inB[n - j] = inA[j] * s + inA[n - j] * c; } if (2 * j == n) inB[j] = -inA[j]; rfftw_out_of_place(plan, n, inA, outA); rfftw_out_of_place(plan, n, inB, outB); rarray_rol(tmp, outA, n, 1, 1); rarray_compare(tmp, outB, n); } } WHEN_VERBOSE(2, printf("Validation done\n")); fftw_free(tmp); fftw_free(outC); fftw_free(outB); fftw_free(outA); fftw_free(inC); fftw_free(inB1); fftw_free(inA1); fftw_free(inB); fftw_free(inA); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size; fftw_complex *in1, *work = NULL, *in2, *out2; fftw_mpi_plan plan; int i; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (specific) { WHEN_VERBOSE(2, my_printf("N/A\n")); return; } if (coinflip()) flags |= FFTW_THREADSAFE; plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags); fftw_mpi_local_sizes(plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); in1 = (fftw_complex *) fftw_malloc(total_local_size * sizeof(fftw_complex) * howmany); if (coinflip()) { WHEN_VERBOSE(2, my_printf("w/work...")); work = (fftw_complex *) fftw_malloc(total_local_size * sizeof(fftw_complex) * howmany); } in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); } for (i = 0; i < local_n * howmany; ++i) { c_re(in1[i]) = c_re(in2[i + local_start*howmany]); c_im(in1[i]) = c_im(in2[i + local_start*howmany]); } /* fft-ize */ fftw_mpi(plan, howmany, in1, work); fftw_mpi_destroy_plan(plan); fftw(validated_plan, howmany, in2, howmany, 1, out2, howmany, 1); CHECK(compute_error_complex(in1, 1, out2 + local_start_after_transform*howmany, 1, howmany*local_n_after_transform) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, my_printf("OK\n")); fftw_free(in1); fftw_free(work); fftw_free(in2); fftw_free(out2); }
double gen_beta(const gen_beta_param *gen) { /* The following temporaries are recomputed on each iteration, * or restored from gen->param array. */ double c,r,s,t,u1,u2,v,w,z,lambda; double logv, logw, log_sum; double a = gen->a; double b = gen->b; double min_ab = gen->min_ab; double max_ab = gen->max_ab; double sum_ab = gen->sum_ab; if (max_ab<0.5) { /* Use Joehnk's algorithm. * Use logv and logw, rather than v and w, to avoid * floating-point underflow with very small a or b values. */ do { u1 = DRAND(); u2 = DRAND(); logv = log(u1)/a; logw = log(u2)/b; log_sum = logv>logw? logv + log(1+ exp(logw-logv)) : logw + log(1+ exp(logv-logw)); } while (log_sum>0.0); assert(logv<=log_sum); return exp(logv - log_sum); } if (min_ab > 1.0) { /* use Algorithm BB */ lambda = gen->param[0]; c = gen->param[1]; do { u1 = DRAND(); u2 = DRAND(); v = lambda*log(u1/(1.0-u1)); w = aexp(min_ab,v); z = u1*u1*u2; r = c*v-1.38629436112; s = min_ab+r-w; if(s+2.609438 >= 5.0*z) break; t = log(z); } while ( /* s<=t && */ r+sum_ab*log(sum_ab/(max_ab+w)) < t); return ret(a,min_ab, max_ab, w); } if (max_ab>= 1.0) { /* use Atkinson's switching method, as * described in Dagpunar's book * p=min_ab, q=max_ab * t stored as gen->param[0], r as gen->param[1] */ t = gen->param[0]; r = gen->param[1]; for(;;) { u1 = DRAND(); u2 = DRAND(); if (u1 < r) { w = t*pow(u1/r, 1/min_ab); if (log(u2) < (max_ab -1)*log(1-w)) break; } else { w = 1- (1-t)*pow((1-u1)/(1-r), 1/max_ab); if (log(u2) < (min_ab -1) * log(w/t)) break; } } return (a==min_ab)? w : 1-w; } else { /* use Atkinson's Algorithm */ t = gen->param[0]; r = gen->param[1]; for(;;) { u1 = DRAND(); u2 = DRAND(); if (u1 < r) { w = t*pow(u1/r, 1/min_ab); if (log(u2) < (max_ab -1)*log((1-w)/(1-t))) break; } else { w = 1- (1-t)*pow((1-u1)/(1-r), 1/max_ab); if (log(u2) < (min_ab -1) * log(w/t)) break; } } return (a==min_ab)? w : 1-w; } }
void testnd_in_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan, int alternate_api, int specific, int force_buffered) { int local_nx, local_x_start, local_ny_after_transpose, local_y_start_after_transpose, total_local_size; int istride; int N, dim, i; fftw_complex *in1, *work = 0, *in2; fftwnd_mpi_plan p = 0; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (specific || rank < 2) return; if (coinflip()) flags |= FFTW_THREADSAFE; if (force_buffered) flags |= FFTWND_FORCE_BUFFERED; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_mpi_create_plan(MPI_COMM_WORLD, n[0], n[1], dir, flags); else p = fftw3d_mpi_create_plan(MPI_COMM_WORLD, n[0], n[1], n[2], dir, flags); } else /* standard api */ p = fftwnd_mpi_create_plan(MPI_COMM_WORLD, rank, n, dir, flags); fftwnd_mpi_local_sizes(p, &local_nx, &local_x_start, &local_ny_after_transpose, &local_y_start_after_transpose, &total_local_size); in1 = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE * sizeof(fftw_complex)); if (coinflip()) { WHEN_VERBOSE(1, my_printf("w/work...")); work = (fftw_complex *) fftw_malloc(total_local_size * MAX_STRIDE * sizeof(fftw_complex)); } in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < N; ++i) { c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); } for (i = 0; i < local_nx * (N/n[0]); ++i) { int j; for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re((in2 + local_x_start * (N/n[0])) [i]); c_im(in1[i * istride + j]) = c_im((in2 + local_x_start * (N/n[0])) [i]); } } fftwnd_mpi(p, istride, in1, work, FFTW_NORMAL_ORDER); fftwnd(validated_plan, 1, in2, 1, 1, NULL, 0, 0); for (i = 0; i < istride; ++i) CHECK(compute_error_complex(in1 + i, istride, in2 + local_x_start * (N/n[0]), 1, local_nx * (N/n[0])) < TOLERANCE, "testnd_in_place: wrong answer"); } fftwnd_mpi_destroy_plan(p); fftw_free(in2); fftw_free(work); fftw_free(in1); }
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan) { int istride, ostride; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); CHECK(p != NULL && ip != NULL, "can't create plan"); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nr + j) * istride + k] = c_re(in2[i * nr + j]); } for (i = 0; i < N * istride; ++i) out3[i] = 0.0; fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { int howmany = (istride < ostride) ? istride : ostride; WHEN_VERBOSE(2, printf("\n testing stride %d/%d...", istride, ostride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, out1); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "out-of-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, out3); for (i = 0; i < N * istride; ++i) out3[i] *= 1.0 / N; if (istride == howmany) CHECK(compute_error(out3, 1, in1, 1, N * istride) < TOLERANCE, "out-of-place (c2r): wrong answer"); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nr * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "out-of-place (c2r): wrong answer (check 2)"); } } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out3); fftw_free(out2); fftw_free(in2); fftw_free(out1); fftw_free(in1); }
int main (int argc, char *argv[]) { int max_points_per_rank = 100; int num_time_steps = 100; REAL time_step = 0.001; int n, i, rank, size, *ranks; REAL *point[3], *velo[3]; struct timing t; double dt[2], gt[2]; if (argc == 1) { printf ("SYNOPSIS: test_dynlb max_points_per_rank [num_time_steps = 100] [time_step = 0.001]\n"); return 0; } if (argc >= 2) max_points_per_rank = atoi(argv[1]); if (argc >= 3) num_time_steps = atoi(argv[2]); if (argc >= 4) time_step = atof(argv[3]); MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &size); srand(time(NULL) + rank); n = rand() % max_points_per_rank + 1; if (rank == 0) printf ("Generating random points in unit cube...\n"); timerstart (&t); printf ("Generating %d points on rank %d.\n", n, rank); ERRMEM (point[0] = malloc (n * sizeof (REAL))); ERRMEM (point[1] = malloc (n * sizeof (REAL))); ERRMEM (point[2] = malloc (n * sizeof (REAL))); ERRMEM (velo[0] = malloc (n * sizeof (REAL))); ERRMEM (velo[1] = malloc (n * sizeof (REAL))); ERRMEM (velo[2] = malloc (n * sizeof (REAL))); ERRMEM (ranks = malloc (n * sizeof (int))); for (i = 0; i < n; i ++) { point[0][i] = DRAND(); point[1][i] = DRAND(); point[2][i] = DRAND(); velo[0][i] = DRAND()-0.5; velo[1][i] = DRAND()-0.5; velo[2][i] = DRAND()-0.5; } dt[0] = ptimerend (&t); if (rank == 0) printf ("Generating points took %g sec.\n", dt[0]); if (rank == 0) printf ("Timing %d simple morton based balancing steps...\n", num_time_steps); for (i = 0, dt[0] = 0.0, dt[1] = 0.0; i < num_time_steps; i ++) { timerstart (&t); dynlb_morton_balance (n, point, ranks); dt[0] += timerend (&t); if (rank == 0) printf ("."), fflush (stdout); timerstart (&t); unit_cube_step (0, n, point, velo, time_step); dt[1] += timerend (&t); } MPI_Allreduce (dt, gt, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); gt[0] /= (double)size * (double)num_time_steps; gt[1] /= (double)size * (double)num_time_steps; if (rank == 0) printf ("\nMORTON: avg. integration: %g sec. per step, avg. balancing: %g sec. per step; ratio: %g\n", gt[0]+gt[1], gt[1], gt[1]/(gt[0]+gt[1])); if (rank == 0) printf ("Creating partitioning tree based balancer ...\n"); timerstart (&t); struct dynlb *lb = dynlb_create (0, n, point, 0, 0.5, DYNLB_RCB_TREE); dt[0] += ptimerend (&t); if (rank == 0) printf ("Took %g sec.\nInitial imbalance %g\n", dt[0], lb->imbalance); if (rank == 0) printf ("Timing %d partitioning tree based balancing steps...\n", num_time_steps); for (i = 0, dt[0] = 0.0, dt[1] = 0.0; i < num_time_steps; i ++) { timerstart (&t); dynlb_update (lb, n, point); dt[0] += timerend (&t); if (rank == 0) printf ("Step %d imbalance %g\n", i, lb->imbalance); timerstart (&t); unit_cube_step (0, n, point, velo, time_step); dt[1] += timerend (&t); } MPI_Allreduce (dt, gt, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); gt[0] /= (double)size * (double)num_time_steps; gt[1] /= (double)size * (double)num_time_steps; if (rank == 0) printf ("TREE: avg. integration: %g sec. per step, avg. balancing: %g sec. per step; ratio: %g\n", gt[0]+gt[1], gt[1], gt[1]/(gt[0]+gt[1])); free (point[0]); free (point[1]); free (point[2]); free (velo[0]); free (velo[1]); free (velo[2]); free (ranks); MPI_Finalize (); return 0; }
/* generate test */ static void gen () { double d [3], move [3]; switch (mode) { case GJK_CONVEX_CONVEX: { asize = bsize = 0; while (asize < minim) asize = rand () % limit; while (bsize < minim) bsize = rand () % limit; SETRAND (move, 1.0); for (int n = 0; n < asize; n ++) { SETRAND (apoint [n], 0.75); ADD (apoint [n], move, apoint [n]); } for (int n = 0; n < bsize; n ++) { SETRAND (bpoint [n], 0.75); SUB (bpoint [n], move, bpoint [n]); } free (a); free (b); a = hull ((double*)apoint, asize, &alength); b = hull ((double*)bpoint, bsize, &blength); double *va, *vb; int nva, nvb; va = TRI_Vertices (a, alength, &nva); vb = TRI_Vertices (b, blength, &nvb); gjk (va, nva, vb, nvb, p, q); free (va); free (vb); } break; case GJK_CONVEX_SPHERE: { asize = bsize = 0; while (asize < minim) asize = rand () % limit; SETRAND (move, 1.0); for (int n = 0; n < asize; n ++) { SETRAND (apoint [n], 0.75); ADD (apoint [n], move, apoint [n]); } free (a); a = hull ((double*)apoint, asize, &alength); SETRAND (center, 1.0); SUB (center, move, center); radius = 0.75 * DRAND (); double *va; int nva; va = TRI_Vertices (a, alength, &nva); gjk_convex_sphere (va, nva, center, radius, p, q); free (va); } break; case GJK_CONVEX_POINT: { asize = bsize = 0; while (asize < minim) asize = rand () % limit; SETRAND (move, 1.0); for (int n = 0; n < asize; n ++) { SETRAND (apoint [n], 0.75); ADD (apoint [n], move, apoint [n]); } free (a); a = hull ((double*)apoint, asize, &alength); SETRAND (center, 1.0); SUB (center, move, center); radius = 0.0; double *va; int nva; va = TRI_Vertices (a, alength, &nva); COPY (center, p); gjk_convex_point (va, nva, p, q); free (va); } break; case GJK_CONVEX_ELLIP: { asize = bsize = 0; while (asize < minim) asize = rand () % limit; SETRAND (move, 1.0); for (int n = 0; n < asize; n ++) { SETRAND (apoint [n], 0.75); ADD (apoint [n], move, apoint [n]); } free (a); a = hull ((double*)apoint, asize, &alength); SETRAND (el1_center, 1.0); SUB (el1_center, move, el1_center); el1_sca [0] = 0.75 * DRAND (); el1_sca [1] = 0.75 * DRAND (); el1_sca [2] = 0.75 * DRAND (); EXPMAP (el1_sca, el1_rot); double *va; int nva; va = TRI_Vertices (a, alength, &nva); gjk_convex_ellip (va, nva, el1_center, el1_sca, el1_rot, p, q); free (va); } break; case GJK_SPHERE_ELLIP: { SETRAND (move, 1.0); SETRAND (center, 1.0); radius = 0.75 * DRAND (); SETRAND (el1_center, 1.0); SUB (el1_center, move, el1_center); el1_sca [0] = 0.75 * DRAND (); el1_sca [1] = 0.75 * DRAND (); el1_sca [2] = 0.75 * DRAND (); EXPMAP (el1_sca, el1_rot); gjk_sphere_ellip (center, radius, el1_center, el1_sca, el1_rot, p, q); } break; case GJK_ELLIP_ELLIP: { SETRAND (move, 1.0); SETRAND (el1_center, 1.0); el1_sca [0] = 0.75 * DRAND (); el1_sca [1] = 0.75 * DRAND (); el1_sca [2] = 0.75 * DRAND (); EXPMAP (el1_sca, el1_rot); SETRAND (el2_center, 1.0); SUB (el2_center, move, el2_center); el2_sca [0] = 0.75 * DRAND (); el2_sca [1] = 0.75 * DRAND (); el2_sca [2] = 0.75 * DRAND (); EXPMAP (el2_sca, el2_rot); gjk_ellip_ellip (el1_center, el1_sca, el1_rot, el2_center, el2_sca, el2_rot, p, q); } break; case GJK_ELLIP_POINT: { SETRAND (move, 1.0); SETRAND (el1_center, 1.0); el1_sca [0] = 0.75 * DRAND (); el1_sca [1] = 0.75 * DRAND (); el1_sca [2] = 0.75 * DRAND (); EXPMAP (el1_sca, el1_rot); SETRAND (center, 1.0); SUB (center, move, center); radius = 0.0; COPY (center, p); gjk_ellip_point (el1_center, el1_sca, el1_rot, p, q); } break; } SUB (p, q, d); printf ("|p-q|=%g\n", LEN (d)); }