示例#1
0
void finish_run(FILE *log,t_commrec *cr,char *confout,
		t_nsborder *nsb,t_topology *top,t_parm *parm,
		t_nrnb nrnb[],double nodetime,double realtime,int step,
		bool bWriteStat)
{
  int    i,j;
  t_nrnb ntot;
  real   runtime;
  for(i=0; (i<eNRNB); i++)
    ntot.n[i]=0;
  for(i=0; (i<nsb->nnodes); i++)
    for(j=0; (j<eNRNB); j++)
      ntot.n[j]+=nrnb[i].n[j];
  runtime=0;
  if (bWriteStat) {
    runtime=parm->ir.nsteps*parm->ir.delta_t;
    if (MASTER(cr)) {
      fprintf(stderr,"\n\n");
      print_perf(stderr,nodetime,realtime,runtime,&ntot,nsb->nnodes);
    }
    else
      print_nrnb(log,&(nrnb[nsb->nodeid]));
  }

  if (MASTER(cr)) {
    print_perf(log,nodetime,realtime,runtime,&ntot,nsb->nnodes);
    if (nsb->nnodes > 1)
      pr_load(log,nsb->nnodes,nrnb);
  }
}
示例#2
0
int main(int argc, char* argv[])
{
	int debug_flag;
	int n;
	int t;

	if (parse_args(argc, argv, &debug_flag, "input size", &n, "threads", &t, NULL, NULL, NULL, NULL) != 0)
	{
		fail("Error while parsing args.");
	}

	if ((n < 1) || (t < 1))
	{
		fail("Bad input");
	}

	omp_set_num_threads(t);

	ATYPE* data = geninput(n);
	int plus_ops = 0;
	int acc_ops = 0;

	if (debug_flag) print_array("input", data, n);

	double dtime;
	benchmark(data, n, &plus_ops, &acc_ops, &dtime);

	if (debug_flag)
	{
		/* ========== OUTPUT =========== */

		print_array("output", data, n);
		print_perf_debug(n, t, plus_ops, acc_ops, dtime);

		ATYPE* ref = reference(n);

		if (array_equal(data, ref, n))
		{
			printf("SUCCESS\n");
		}
		else
		{
			printf("EPIC FAILURE\n");
		}

		free(ref);
	}
	else
	{
		print_perf(n, t, plus_ops, acc_ops, dtime);
	}

	free(data);
}
示例#3
0
文件: test.c 项目: mdurrer/cgd
int main(int argc, char **argv)
{
	int mbn, prev_mx = -1, prev_my = -1, prev_mbn = 0;

	if(parse_args(argc, argv) == -1) {
		return 1;
	}

	if(init() == -1) {
		return 1;
	}

	reset_timer();

	for(;;) {
        if(kbhit()) {
			if(keyb(getch()) == 0) {
				break;
            }
        }

		mbn = read_mouse(&mx, &my);
		if(mbn != prev_mbn) {
			mouse_button(mbn, mx, my);
			prev_mbn = mbn;
		}
		if(mx != prev_mx || my != prev_my) {
			if(mbn) {
				mouse_motion(mx, my);
			}
			prev_mx = mx;
			prev_my = my;
		}

		redraw();
	}

	shutdown();
	print_perf();
	return 0;
}
示例#4
0
文件: perf.c 项目: EvanMisshula/julia
int main() {
    // Initialize RNG
    dsfmt_gv_init_gen_rand(0);

    double t, tmin;

    // fib(20)
    assert(fib(20) == 6765);
    int f = 0;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        f += fib(20);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("fib", tmin);

    // parse_bin
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        char s[11];
        for (int k=0; k<1000; ++k) {
            uint32_t n = dsfmt_gv_genrand_uint32();
            sprintf(s, "%x", n);
            uint32_t m = (uint32_t)parse_int(s, 16);
            assert(m == n);
        }
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("parse_int", tmin);

    // // array constructor
    // tmin = INFINITY;
    // for (int i=0; i<NITER; ++i) {
    //     t = clock_now();
    //     double *a = ones(200,200);
    //     free(a);
    //     t = clock_now()-t;
    //     if (t < tmin) tmin = t;
    // }
    // print_perf("ones", tmin);
    // 
    // // A*A'
    // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
    // double *b = ones(200, 200);
    // tmin = INFINITY;
    // for (int i=0; i<NITER; ++i) {
    //     t = clock_now();
    //     double *c = matmul_aat(200, b);
    //     free(c);
    //     t = clock_now()-t;
    //     if (t < tmin) tmin = t;
    // }
    // free(b);
    // print_perf("AtA", tmin);

    // mandel
    int mandel_sum;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        mandel_sum = mandelperf();
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    assert(mandel_sum == 14719);
    print_perf("mandel", tmin);

    // sort
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        double *d = myrand(5000);
        quicksort(d, 0, 5000-1);
        free(d);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("quicksort", tmin);

    // pi sum
    double pi;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        pi = pisum();
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    assert(fabs(pi-1.644834071848065) < 1e-12);
    print_perf("pi_sum", tmin);

    // rand mat stat
    struct double_pair r;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        r = randmatstat(1000);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0);
    print_perf("rand_mat_stat", tmin);

    // rand mat mul
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        double *C = randmatmul(1000);
        assert(0 <= C[0]);
        free(C);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("rand_mat_mul", tmin);

    // printfd
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        printfd(100000);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("printfd", tmin);

    return 0;
}
void
do_times(const size_t m,
         const size_t n,
         const size_t p,
         const size_t nloop,
         const bool value,
         const bool analytic,
         const bool sfad,
         const bool slfad,
         const bool dfad,
         const bool check)
{
    Perf perf_analytic;
    perf_analytic.time = 1.0;

    // Run analytic
    if (analytic) {
        perf_analytic = do_time_analytic<ViewArgs...>(m,n,p,nloop,check);
    }

    // Run value
    if (value) {
        Perf perf = do_time_val<ViewArgs...>(m,n,nloop,check);
        print_perf(perf, perf_analytic, "Value     ");
    }

    if (analytic) {
        print_perf(perf_analytic, perf_analytic, "Analytic  ");
    }

    if(analytic && p == SFadSize) {
        Perf perf =
            do_time_analytic_s<SFadSize, ViewArgs...>(m,n,nloop,check);
        print_perf(perf, perf_analytic, "Analytic-s");
    }

    if(analytic && p <= SLFadSize) {
        Perf perf =
            do_time_analytic_sl<SLFadSize, ViewArgs...>(m,n,p,nloop,check);
        print_perf(perf, perf_analytic, "Analytic-sl");
    }

    // Run SFad
    if (sfad && p == SFadSize) {
        Perf perf =
            do_time_fad<Sacado::Fad::SFad<double,SFadSize>, ViewArgs...>(m,n,p,nloop,check);
        print_perf(perf, perf_analytic, "SFad      ");
    }

    // Run SLFad
    if (slfad && p <= SLFadSize) {
        Perf perf =
            do_time_fad<Sacado::Fad::SLFad<double,SLFadSize>, ViewArgs...>(m,n,p,nloop,check);
        print_perf(perf, perf_analytic, "SLFad     ");
    }

    // Run DFad
    if (dfad) {
        Perf perf =
            do_time_fad<Sacado::Fad::DFad<double>, ViewArgs...>(m,n,p,nloop,check);
        print_perf(perf, perf_analytic, "DFad      ");
    }

}
示例#6
0
文件: perf_meta.c 项目: Starlink/hdf5
/*-------------------------------------------------------------------------
 * Function:	create_attrs_3
 *
 * Purpose:	Attempts to create some attributes for each dataset in a
 * 		loop.
 *
 * Return:	Success:	0
 *
 *		Failure:	-1
 *
 * Programmer:	Raymond Lu
 *		Friday, Oct 3, 2003
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
static herr_t
create_attrs_3(void)
{
    hid_t	file, dataset, attr;
    char	filename[128];
    char	dset_name[64];
    char	attr_name[128];
    int		loop_num;
    int		i, j, k;
    p_time      attr_t  = {0, 0, 0, 1000000, 0, ""};
    p_time      open_t  = {0, 0, 0, 1000000, 0, "H5Dopen2"};
    p_time      close_t = {0, 0, 0, 1000000, 0, ""};

#ifdef H5_HAVE_PARALLEL
    /* need the rank for printing data */
    int         mpi_rank;
    if(facc_type == FACC_MPIO)
        MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
#endif /*H5_HAVE_PARALLEL*/

    h5_fixname(FILENAME[2], fapl, filename, sizeof filename);

    if ((file=H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT,
	fapl)) < 0)
	goto error;

    if(create_dsets(file) < 0)
	goto error;

    /*
     * Create some(user specifies the number) attributes for each dataset
     * in a loop
     */
    loop_num = NUM_ATTRS/BATCH_ATTRS;

    for(i = 0; i < loop_num; i++) {
    	for(j = 0; j < NUM_DSETS; j++) {
            sprintf(dset_name, "dataset %d", j);
            open_t.start = retrieve_time();
            if((dataset = H5Dopen2(file, dset_name, H5P_DEFAULT)) < 0)
                goto error;
            perf(&open_t, open_t.start, retrieve_time());

            for(k = 0; k < BATCH_ATTRS; k++) {
                sprintf(attr_name, "some attrs for each dset %d %d", i, k);
                attr_t.start = retrieve_time();
                if((attr = H5Acreate2(dataset, attr_name, H5T_NATIVE_DOUBLE,
                        small_space, H5P_DEFAULT, H5P_DEFAULT)) < 0)
                    goto error;
                if(H5Aclose(attr) < 0)
                    goto error;
                perf(&attr_t, attr_t.start, retrieve_time());
                if(flush_attr && H5Fflush(file,  H5F_SCOPE_LOCAL) < 0)
                    goto error;
            } /* end for */

            close_t.start = retrieve_time();
            if(H5Dclose(dataset) < 0)
                goto error;
            perf(&close_t, close_t.start, retrieve_time());
            if(flush_dset && H5Fflush(file,  H5F_SCOPE_LOCAL) < 0)
                goto error;
    	} /* end for */
    } /* end for */

#ifdef H5_HAVE_PARALLEL
    if(facc_type == FACC_MPIO)
        MPI_Barrier(MPI_COMM_WORLD);
#endif /*H5_HAVE_PARALLEL*/

#ifdef H5_HAVE_PARALLEL
    /* only process 0 reports if parallel */
    if (facc_type == FACC_DEFAULT || (facc_type != FACC_DEFAULT && MAINPROCESS))
#endif /*H5_HAVE_PARALLEL*/
    {
        /* Calculate the average time */
        open_t.avg = open_t.total / (loop_num*NUM_DSETS);
        close_t.avg = close_t.total / (loop_num*NUM_DSETS);
        attr_t.avg = attr_t.total / (NUM_ATTRS*NUM_DSETS);

        /* Print out the performance result */
        fprintf(stderr, "3.  Create %d attributes for each of %d existing datasets for %d times\n",
            BATCH_ATTRS, NUM_DSETS, loop_num);
        print_perf(open_t, close_t, attr_t);
    }

    if (H5Fclose(file) < 0) goto error;

    return 0;

error:
    return -1;
}
示例#7
0
void finish_run(FILE *fplog,t_commrec *cr,char *confout,
		t_inputrec *inputrec,
		t_nrnb nrnb[],gmx_wallcycle_t wcycle,
		double nodetime,double realtime,int nsteps_done,
		bool bWriteStat)
{
  int    i,j;
  t_nrnb *nrnb_all=NULL,ntot;
  real   delta_t;
  double nbfs,mflop;
  double cycles[ewcNR];
#ifdef GMX_MPI
  int    sender;
  double nrnb_buf[4];
  MPI_Status status;
#endif

  wallcycle_sum(cr,wcycle,cycles);

  if (cr->nnodes > 1) {
    if (SIMMASTER(cr))
      snew(nrnb_all,cr->nnodes);
#ifdef GMX_MPI
    MPI_Gather(nrnb,sizeof(t_nrnb),MPI_BYTE,
	       nrnb_all,sizeof(t_nrnb),MPI_BYTE,
	       0,cr->mpi_comm_mysim);
#endif  
  } else {
    nrnb_all = nrnb;
  }
    
  if (SIMMASTER(cr)) {
    for(i=0; (i<eNRNB); i++)
      ntot.n[i]=0;
    for(i=0; (i<cr->nnodes); i++)
      for(j=0; (j<eNRNB); j++)
	ntot.n[j] += nrnb_all[i].n[j];

    print_flop(fplog,&ntot,&nbfs,&mflop);
    if (nrnb_all) {
      sfree(nrnb_all);
    }
  }

  if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
    print_dd_statistics(cr,inputrec,fplog);
  }

  if (SIMMASTER(cr)) {
    if (PARTDECOMP(cr)) {
      pr_load(fplog,cr,nrnb_all);
    }

    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);

    if (EI_DYNAMICS(inputrec->eI)) {
      delta_t = inputrec->delta_t;
    } else {
      delta_t = 0;
    }
    
    if (fplog) {
      print_perf(fplog,nodetime,realtime,cr->nnodes-cr->npmenodes,
		 nsteps_done,delta_t,nbfs,mflop);
    }
    if (bWriteStat) {
      print_perf(stderr,nodetime,realtime,cr->nnodes-cr->npmenodes,
		 nsteps_done,delta_t,nbfs,mflop);
    }

    /*
    runtime=inputrec->nsteps*inputrec->delta_t;
    if (bWriteStat) {
      if (cr->nnodes == 1)
	fprintf(stderr,"\n\n");
      print_perf(stderr,nodetime,realtime,runtime,&ntot,
		 cr->nnodes-cr->npmenodes,FALSE);
    }
    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
    print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
	       TRUE);
    if (PARTDECOMP(cr))
      pr_load(fplog,cr,nrnb_all);
    if (cr->nnodes > 1)
      sfree(nrnb_all);
    */
  }
}
示例#8
0
文件: perf.c 项目: ArchieCall/julia
int main() {
    // Initialize RNG
    dsfmt_gv_init_gen_rand(0);

    double t, tmin;

    // fib(20)
    assert(fib(20) == 6765);
    int f = 0;
    tmin = INFINITY;
    volatile int fibarg = 20; // prevent constant propagation
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        for (int j = 0; j < 1000; j++)
                f += fib(fibarg);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("fib", tmin / 1000);

    // parse_bin
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        char s[11];
        for (int k=0; k<1000 * 100; ++k) {
            uint32_t n = dsfmt_gv_genrand_uint32();
            sprintf(s, "%x", n);
            uint32_t m = (uint32_t)parse_int(s, 16);
            assert(m == n);
        }
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("parse_int", tmin / 100);

    // // array constructor
    // tmin = INFINITY;
    // for (int i=0; i<NITER; ++i) {
    //     t = clock_now();
    //     double *a = ones(200,200);
    //     free(a);
    //     t = clock_now()-t;
    //     if (t < tmin) tmin = t;
    // }
    // print_perf("ones", tmin);
    //
    // // A*A'
    // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
    // double *b = ones(200, 200);
    // tmin = INFINITY;
    // for (int i=0; i<NITER; ++i) {
    //     t = clock_now();
    //     double *c = matmul_aat(200, b);
    //     free(c);
    //     t = clock_now()-t;
    //     if (t < tmin) tmin = t;
    // }
    // free(b);
    // print_perf("AtA", tmin);

    // mandel
    /* The initialization on the next line is deliberately volatile to
     * prevent gcc from optimizing away the entire loop.
     * (First observed in gcc 4.9.2)
     */
    static volatile int mandel_sum_init = 0;
    int mandel_sum2 = mandel_sum_init;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        int *M;
        t = clock_now();
        for (int j = 0; j < 100; j++) {
            M = mandelperf();
            if (j == 0) {
                int mandel_sum = 0;
                // for (int ii = 0; ii < 21; ii++) {
                //     for (int jj = 0; jj < 26; jj++) {
                //         printf("%4d", M[26*ii + jj]);
                //     }
                //     printf("\n");
                // }
                for (int k = 0; k < 21*26; k++) {
                    mandel_sum += M[k];
                }
                assert(mandel_sum == 14791);
                mandel_sum2 += mandel_sum;
            }
            free(M);
        }
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    assert(mandel_sum2 == 14791 * NITER);
    print_perf("mandel", tmin / 100);

    // sort
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        double *d = myrand(5000);
        quicksort(d, 0, 5000-1);
        free(d);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("quicksort", tmin);

    // pi sum
    double pi;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        pi = pisum();
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    assert(fabs(pi-1.644834071848065) < 1e-12);
    print_perf("pi_sum", tmin);

    // rand mat stat
    struct double_pair r;
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        r = randmatstat(1000);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0);
    print_perf("rand_mat_stat", tmin);

    // rand mat mul
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        double *C = randmatmul(1000);
        assert(0 <= C[0]);
        free(C);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("rand_mat_mul", tmin);

    // printfd
    tmin = INFINITY;
    for (int i=0; i<NITER; ++i) {
        t = clock_now();
        printfd(100000);
        t = clock_now()-t;
        if (t < tmin) tmin = t;
    }
    print_perf("printfd", tmin);

    return 0;
}