void finish_run(FILE *log,t_commrec *cr,char *confout, t_nsborder *nsb,t_topology *top,t_parm *parm, t_nrnb nrnb[],double nodetime,double realtime,int step, bool bWriteStat) { int i,j; t_nrnb ntot; real runtime; for(i=0; (i<eNRNB); i++) ntot.n[i]=0; for(i=0; (i<nsb->nnodes); i++) for(j=0; (j<eNRNB); j++) ntot.n[j]+=nrnb[i].n[j]; runtime=0; if (bWriteStat) { runtime=parm->ir.nsteps*parm->ir.delta_t; if (MASTER(cr)) { fprintf(stderr,"\n\n"); print_perf(stderr,nodetime,realtime,runtime,&ntot,nsb->nnodes); } else print_nrnb(log,&(nrnb[nsb->nodeid])); } if (MASTER(cr)) { print_perf(log,nodetime,realtime,runtime,&ntot,nsb->nnodes); if (nsb->nnodes > 1) pr_load(log,nsb->nnodes,nrnb); } }
int main(int argc, char* argv[]) { int debug_flag; int n; int t; if (parse_args(argc, argv, &debug_flag, "input size", &n, "threads", &t, NULL, NULL, NULL, NULL) != 0) { fail("Error while parsing args."); } if ((n < 1) || (t < 1)) { fail("Bad input"); } omp_set_num_threads(t); ATYPE* data = geninput(n); int plus_ops = 0; int acc_ops = 0; if (debug_flag) print_array("input", data, n); double dtime; benchmark(data, n, &plus_ops, &acc_ops, &dtime); if (debug_flag) { /* ========== OUTPUT =========== */ print_array("output", data, n); print_perf_debug(n, t, plus_ops, acc_ops, dtime); ATYPE* ref = reference(n); if (array_equal(data, ref, n)) { printf("SUCCESS\n"); } else { printf("EPIC FAILURE\n"); } free(ref); } else { print_perf(n, t, plus_ops, acc_ops, dtime); } free(data); }
int main(int argc, char **argv) { int mbn, prev_mx = -1, prev_my = -1, prev_mbn = 0; if(parse_args(argc, argv) == -1) { return 1; } if(init() == -1) { return 1; } reset_timer(); for(;;) { if(kbhit()) { if(keyb(getch()) == 0) { break; } } mbn = read_mouse(&mx, &my); if(mbn != prev_mbn) { mouse_button(mbn, mx, my); prev_mbn = mbn; } if(mx != prev_mx || my != prev_my) { if(mbn) { mouse_motion(mx, my); } prev_mx = mx; prev_my = my; } redraw(); } shutdown(); print_perf(); return 0; }
int main() { // Initialize RNG dsfmt_gv_init_gen_rand(0); double t, tmin; // fib(20) assert(fib(20) == 6765); int f = 0; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); f += fib(20); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("fib", tmin); // parse_bin tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); char s[11]; for (int k=0; k<1000; ++k) { uint32_t n = dsfmt_gv_genrand_uint32(); sprintf(s, "%x", n); uint32_t m = (uint32_t)parse_int(s, 16); assert(m == n); } t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("parse_int", tmin); // // array constructor // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *a = ones(200,200); // free(a); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // print_perf("ones", tmin); // // // A*A' // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) // double *b = ones(200, 200); // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *c = matmul_aat(200, b); // free(c); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // free(b); // print_perf("AtA", tmin); // mandel int mandel_sum; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); mandel_sum = mandelperf(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(mandel_sum == 14719); print_perf("mandel", tmin); // sort tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *d = myrand(5000); quicksort(d, 0, 5000-1); free(d); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("quicksort", tmin); // pi sum double pi; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); pi = pisum(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(fabs(pi-1.644834071848065) < 1e-12); print_perf("pi_sum", tmin); // rand mat stat struct double_pair r; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); r = randmatstat(1000); t = clock_now()-t; if (t < tmin) tmin = t; } // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0); print_perf("rand_mat_stat", tmin); // rand mat mul tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *C = randmatmul(1000); assert(0 <= C[0]); free(C); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("rand_mat_mul", tmin); // printfd tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); printfd(100000); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("printfd", tmin); return 0; }
void do_times(const size_t m, const size_t n, const size_t p, const size_t nloop, const bool value, const bool analytic, const bool sfad, const bool slfad, const bool dfad, const bool check) { Perf perf_analytic; perf_analytic.time = 1.0; // Run analytic if (analytic) { perf_analytic = do_time_analytic<ViewArgs...>(m,n,p,nloop,check); } // Run value if (value) { Perf perf = do_time_val<ViewArgs...>(m,n,nloop,check); print_perf(perf, perf_analytic, "Value "); } if (analytic) { print_perf(perf_analytic, perf_analytic, "Analytic "); } if(analytic && p == SFadSize) { Perf perf = do_time_analytic_s<SFadSize, ViewArgs...>(m,n,nloop,check); print_perf(perf, perf_analytic, "Analytic-s"); } if(analytic && p <= SLFadSize) { Perf perf = do_time_analytic_sl<SLFadSize, ViewArgs...>(m,n,p,nloop,check); print_perf(perf, perf_analytic, "Analytic-sl"); } // Run SFad if (sfad && p == SFadSize) { Perf perf = do_time_fad<Sacado::Fad::SFad<double,SFadSize>, ViewArgs...>(m,n,p,nloop,check); print_perf(perf, perf_analytic, "SFad "); } // Run SLFad if (slfad && p <= SLFadSize) { Perf perf = do_time_fad<Sacado::Fad::SLFad<double,SLFadSize>, ViewArgs...>(m,n,p,nloop,check); print_perf(perf, perf_analytic, "SLFad "); } // Run DFad if (dfad) { Perf perf = do_time_fad<Sacado::Fad::DFad<double>, ViewArgs...>(m,n,p,nloop,check); print_perf(perf, perf_analytic, "DFad "); } }
/*------------------------------------------------------------------------- * Function: create_attrs_3 * * Purpose: Attempts to create some attributes for each dataset in a * loop. * * Return: Success: 0 * * Failure: -1 * * Programmer: Raymond Lu * Friday, Oct 3, 2003 * * Modifications: * *------------------------------------------------------------------------- */ static herr_t create_attrs_3(void) { hid_t file, dataset, attr; char filename[128]; char dset_name[64]; char attr_name[128]; int loop_num; int i, j, k; p_time attr_t = {0, 0, 0, 1000000, 0, ""}; p_time open_t = {0, 0, 0, 1000000, 0, "H5Dopen2"}; p_time close_t = {0, 0, 0, 1000000, 0, ""}; #ifdef H5_HAVE_PARALLEL /* need the rank for printing data */ int mpi_rank; if(facc_type == FACC_MPIO) MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); #endif /*H5_HAVE_PARALLEL*/ h5_fixname(FILENAME[2], fapl, filename, sizeof filename); if ((file=H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl)) < 0) goto error; if(create_dsets(file) < 0) goto error; /* * Create some(user specifies the number) attributes for each dataset * in a loop */ loop_num = NUM_ATTRS/BATCH_ATTRS; for(i = 0; i < loop_num; i++) { for(j = 0; j < NUM_DSETS; j++) { sprintf(dset_name, "dataset %d", j); open_t.start = retrieve_time(); if((dataset = H5Dopen2(file, dset_name, H5P_DEFAULT)) < 0) goto error; perf(&open_t, open_t.start, retrieve_time()); for(k = 0; k < BATCH_ATTRS; k++) { sprintf(attr_name, "some attrs for each dset %d %d", i, k); attr_t.start = retrieve_time(); if((attr = H5Acreate2(dataset, attr_name, H5T_NATIVE_DOUBLE, small_space, H5P_DEFAULT, H5P_DEFAULT)) < 0) goto error; if(H5Aclose(attr) < 0) goto error; perf(&attr_t, attr_t.start, retrieve_time()); if(flush_attr && H5Fflush(file, H5F_SCOPE_LOCAL) < 0) goto error; } /* end for */ close_t.start = retrieve_time(); if(H5Dclose(dataset) < 0) goto error; perf(&close_t, close_t.start, retrieve_time()); if(flush_dset && H5Fflush(file, H5F_SCOPE_LOCAL) < 0) goto error; } /* end for */ } /* end for */ #ifdef H5_HAVE_PARALLEL if(facc_type == FACC_MPIO) MPI_Barrier(MPI_COMM_WORLD); #endif /*H5_HAVE_PARALLEL*/ #ifdef H5_HAVE_PARALLEL /* only process 0 reports if parallel */ if (facc_type == FACC_DEFAULT || (facc_type != FACC_DEFAULT && MAINPROCESS)) #endif /*H5_HAVE_PARALLEL*/ { /* Calculate the average time */ open_t.avg = open_t.total / (loop_num*NUM_DSETS); close_t.avg = close_t.total / (loop_num*NUM_DSETS); attr_t.avg = attr_t.total / (NUM_ATTRS*NUM_DSETS); /* Print out the performance result */ fprintf(stderr, "3. Create %d attributes for each of %d existing datasets for %d times\n", BATCH_ATTRS, NUM_DSETS, loop_num); print_perf(open_t, close_t, attr_t); } if (H5Fclose(file) < 0) goto error; return 0; error: return -1; }
void finish_run(FILE *fplog,t_commrec *cr,char *confout, t_inputrec *inputrec, t_nrnb nrnb[],gmx_wallcycle_t wcycle, double nodetime,double realtime,int nsteps_done, bool bWriteStat) { int i,j; t_nrnb *nrnb_all=NULL,ntot; real delta_t; double nbfs,mflop; double cycles[ewcNR]; #ifdef GMX_MPI int sender; double nrnb_buf[4]; MPI_Status status; #endif wallcycle_sum(cr,wcycle,cycles); if (cr->nnodes > 1) { if (SIMMASTER(cr)) snew(nrnb_all,cr->nnodes); #ifdef GMX_MPI MPI_Gather(nrnb,sizeof(t_nrnb),MPI_BYTE, nrnb_all,sizeof(t_nrnb),MPI_BYTE, 0,cr->mpi_comm_mysim); #endif } else { nrnb_all = nrnb; } if (SIMMASTER(cr)) { for(i=0; (i<eNRNB); i++) ntot.n[i]=0; for(i=0; (i<cr->nnodes); i++) for(j=0; (j<eNRNB); j++) ntot.n[j] += nrnb_all[i].n[j]; print_flop(fplog,&ntot,&nbfs,&mflop); if (nrnb_all) { sfree(nrnb_all); } } if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) { print_dd_statistics(cr,inputrec,fplog); } if (SIMMASTER(cr)) { if (PARTDECOMP(cr)) { pr_load(fplog,cr,nrnb_all); } wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles); if (EI_DYNAMICS(inputrec->eI)) { delta_t = inputrec->delta_t; } else { delta_t = 0; } if (fplog) { print_perf(fplog,nodetime,realtime,cr->nnodes-cr->npmenodes, nsteps_done,delta_t,nbfs,mflop); } if (bWriteStat) { print_perf(stderr,nodetime,realtime,cr->nnodes-cr->npmenodes, nsteps_done,delta_t,nbfs,mflop); } /* runtime=inputrec->nsteps*inputrec->delta_t; if (bWriteStat) { if (cr->nnodes == 1) fprintf(stderr,"\n\n"); print_perf(stderr,nodetime,realtime,runtime,&ntot, cr->nnodes-cr->npmenodes,FALSE); } wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles); print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes, TRUE); if (PARTDECOMP(cr)) pr_load(fplog,cr,nrnb_all); if (cr->nnodes > 1) sfree(nrnb_all); */ } }
int main() { // Initialize RNG dsfmt_gv_init_gen_rand(0); double t, tmin; // fib(20) assert(fib(20) == 6765); int f = 0; tmin = INFINITY; volatile int fibarg = 20; // prevent constant propagation for (int i=0; i<NITER; ++i) { t = clock_now(); for (int j = 0; j < 1000; j++) f += fib(fibarg); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("fib", tmin / 1000); // parse_bin tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); char s[11]; for (int k=0; k<1000 * 100; ++k) { uint32_t n = dsfmt_gv_genrand_uint32(); sprintf(s, "%x", n); uint32_t m = (uint32_t)parse_int(s, 16); assert(m == n); } t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("parse_int", tmin / 100); // // array constructor // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *a = ones(200,200); // free(a); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // print_perf("ones", tmin); // // // A*A' // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) // double *b = ones(200, 200); // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *c = matmul_aat(200, b); // free(c); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // free(b); // print_perf("AtA", tmin); // mandel /* The initialization on the next line is deliberately volatile to * prevent gcc from optimizing away the entire loop. * (First observed in gcc 4.9.2) */ static volatile int mandel_sum_init = 0; int mandel_sum2 = mandel_sum_init; tmin = INFINITY; for (int i=0; i<NITER; ++i) { int *M; t = clock_now(); for (int j = 0; j < 100; j++) { M = mandelperf(); if (j == 0) { int mandel_sum = 0; // for (int ii = 0; ii < 21; ii++) { // for (int jj = 0; jj < 26; jj++) { // printf("%4d", M[26*ii + jj]); // } // printf("\n"); // } for (int k = 0; k < 21*26; k++) { mandel_sum += M[k]; } assert(mandel_sum == 14791); mandel_sum2 += mandel_sum; } free(M); } t = clock_now()-t; if (t < tmin) tmin = t; } assert(mandel_sum2 == 14791 * NITER); print_perf("mandel", tmin / 100); // sort tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *d = myrand(5000); quicksort(d, 0, 5000-1); free(d); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("quicksort", tmin); // pi sum double pi; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); pi = pisum(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(fabs(pi-1.644834071848065) < 1e-12); print_perf("pi_sum", tmin); // rand mat stat struct double_pair r; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); r = randmatstat(1000); t = clock_now()-t; if (t < tmin) tmin = t; } // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0); print_perf("rand_mat_stat", tmin); // rand mat mul tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *C = randmatmul(1000); assert(0 <= C[0]); free(C); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("rand_mat_mul", tmin); // printfd tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); printfd(100000); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("printfd", tmin); return 0; }