double randmtzig_randn (void) { while (1) { //#ifdef __LP64__ #if 1 /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ const randmtzig_uint64_t r = NRANDI; const randmtzig_int64_t rabs=r>>1; const int idx = (int)(rabs&0xFF); const double x = ( r&1 ? -rabs : rabs) * wi[idx]; #else double x; int si,idx; register randmtzig_uint32_t lo, hi; randmtzig_int64_t rabs; randmtzig_uint32_t *p = (randmtzig_uint32_t *)&rabs; lo = dsfmt_gv_genrand_uint32(); idx = lo&0xFF; hi = dsfmt_gv_genrand_uint32(); si = hi&UMASK; p[0] = lo; p[1] = hi&0x1FFFFF; x = ( si ? -rabs : rabs ) * wi[idx]; # endif if (rabs < (randmtzig_int64_t)ki[idx]) return x; /* 99.3% of the time we return here 1st try */ else if (idx == 0) { /* As stated in Marsaglia and Tsang * * For the normal tail, the method of Marsaglia[5] provides: * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, * then return r+x. Except that r+x is always in the positive * tail!!!! Any thing random might be used to determine the * sign, but as we already have r we might as well use it * * [PAK] but not the bottom 8 bits, since they are all 0 here! */ double xx, yy; do { xx = - ZIGGURAT_NOR_INV_R * log (RANDU); yy = - log (RANDU); } while ( yy+yy <= xx*xx); return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); } else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) return x; } }
inline static randmtzig_uint64_t randi54 (void) { const randmtzig_uint32_t lo = dsfmt_gv_genrand_uint32(); const randmtzig_uint32_t hi = dsfmt_gv_genrand_uint32()&0x3FFFFF; //#ifndef __LP64__ #if 0 randmtzig_uint64_t u; randmtzig_uint32_t *p = (randmtzig_uint32_t *)&u; p[0] = lo; p[1] = hi; return u; #else return (((randmtzig_uint64_t)hi<<32)|lo); #endif }
int main(int ac, char *av[]) { if (ac == 1) { printf("Usage: randmtzig <n>\n"); return (-1); } int n = atoi(av[1]); time_t t1; dsfmt_gv_init_gen_rand(0); double *p; posix_memalign((void **)&p, 16, n*sizeof(double)); uint32_t *u; posix_memalign((void **)&u, 16, 2*n*sizeof(uint32_t)); t1 = clock(); dsfmt_gv_fill_array_close_open(p, n); printf("Uniform fill (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); t1 = clock(); for (int i = 0; i < n; i++) p[i] = dsfmt_gv_genrand_close_open(); printf("Uniform (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); t1 = clock(); for (int i = 0; i < 2*n; i++) u[i] = dsfmt_gv_genrand_uint32(); printf("Uniform 32-bit ints (2*n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); memset((void *)p, 0, n*sizeof(double)); t1 = clock(); for (int i = 0; i < n; i++) p[i] = randmtzig_gv_randn(); printf("Normal (n): %f\n", (clock() - t1) / (double) CLOCKS_PER_SEC); for (int i = 0; i < 10; i++) printf("%lf\n", p[i]); return 0; }
int main() { // Initialize RNG dsfmt_gv_init_gen_rand(0); double t, tmin; // fib(20) assert(fib(20) == 6765); int f = 0; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); f += fib(20); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("fib", tmin); // parse_bin tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); char s[11]; for (int k=0; k<1000; ++k) { uint32_t n = dsfmt_gv_genrand_uint32(); sprintf(s, "%x", n); uint32_t m = (uint32_t)parse_int(s, 16); assert(m == n); } t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("parse_int", tmin); // // array constructor // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *a = ones(200,200); // free(a); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // print_perf("ones", tmin); // // // A*A' // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) // double *b = ones(200, 200); // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *c = matmul_aat(200, b); // free(c); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // free(b); // print_perf("AtA", tmin); // mandel int mandel_sum; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); mandel_sum = mandelperf(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(mandel_sum == 14719); print_perf("mandel", tmin); // sort tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *d = myrand(5000); quicksort(d, 0, 5000-1); free(d); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("quicksort", tmin); // pi sum double pi; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); pi = pisum(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(fabs(pi-1.644834071848065) < 1e-12); print_perf("pi_sum", tmin); // rand mat stat struct double_pair r; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); r = randmatstat(1000); t = clock_now()-t; if (t < tmin) tmin = t; } // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0); print_perf("rand_mat_stat", tmin); // rand mat mul tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *C = randmatmul(1000); assert(0 <= C[0]); free(C); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("rand_mat_mul", tmin); // printfd tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); printfd(100000); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("printfd", tmin); return 0; }
int main() { // Initialize RNG dsfmt_gv_init_gen_rand(0); double t, tmin; // fib(20) assert(fib(20) == 6765); int f = 0; tmin = INFINITY; volatile int fibarg = 20; // prevent constant propagation for (int i=0; i<NITER; ++i) { t = clock_now(); for (int j = 0; j < 1000; j++) f += fib(fibarg); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("fib", tmin / 1000); // parse_bin tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); char s[11]; for (int k=0; k<1000 * 100; ++k) { uint32_t n = dsfmt_gv_genrand_uint32(); sprintf(s, "%x", n); uint32_t m = (uint32_t)parse_int(s, 16); assert(m == n); } t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("parse_int", tmin / 100); // // array constructor // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *a = ones(200,200); // free(a); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // print_perf("ones", tmin); // // // A*A' // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) // double *b = ones(200, 200); // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *c = matmul_aat(200, b); // free(c); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // free(b); // print_perf("AtA", tmin); // mandel /* The initialization on the next line is deliberately volatile to * prevent gcc from optimizing away the entire loop. * (First observed in gcc 4.9.2) */ static volatile int mandel_sum_init = 0; int mandel_sum2 = mandel_sum_init; tmin = INFINITY; for (int i=0; i<NITER; ++i) { int *M; t = clock_now(); for (int j = 0; j < 100; j++) { M = mandelperf(); if (j == 0) { int mandel_sum = 0; // for (int ii = 0; ii < 21; ii++) { // for (int jj = 0; jj < 26; jj++) { // printf("%4d", M[26*ii + jj]); // } // printf("\n"); // } for (int k = 0; k < 21*26; k++) { mandel_sum += M[k]; } assert(mandel_sum == 14791); mandel_sum2 += mandel_sum; } free(M); } t = clock_now()-t; if (t < tmin) tmin = t; } assert(mandel_sum2 == 14791 * NITER); print_perf("mandel", tmin / 100); // sort tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *d = myrand(5000); quicksort(d, 0, 5000-1); free(d); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("quicksort", tmin); // pi sum double pi; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); pi = pisum(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(fabs(pi-1.644834071848065) < 1e-12); print_perf("pi_sum", tmin); // rand mat stat struct double_pair r; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); r = randmatstat(1000); t = clock_now()-t; if (t < tmin) tmin = t; } // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0); print_perf("rand_mat_stat", tmin); // rand mat mul tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *C = randmatmul(1000); assert(0 <= C[0]); free(C); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("rand_mat_mul", tmin); // printfd tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); printfd(100000); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("printfd", tmin); return 0; }