void run_history_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result )
	printf("Beginning history based simulation...\n");
	long g_abrarov = 0;
	long g_alls = 0;
	unsigned long vhash = 0;
	#pragma omp parallel default(none) \
	shared(input, data) \
	reduction(+:g_abrarov, g_alls, vhash)
		double * xs = (double *) calloc(4, sizeof(double));
		int thread = omp_get_thread_num();
		long abrarov = 0; 
		long alls = 0;

		#ifdef PAPI
		int eventset = PAPI_NULL; 
		int num_papi_events;
		#pragma omp critical
			counter_init(&eventset, &num_papi_events);
		complex double * sigTfactors =
			(complex double *) malloc( input.numL * sizeof(complex double) );

		// This loop is independent!
		// I.e., particle histories can be executed in any order
		#pragma omp for schedule(guided)
		for( int p = 0; p < input.particles; p++ )
			// Particles are seeded by their particle ID
            unsigned long seed = ((unsigned long) p+ (unsigned long)1)* (unsigned long) 13371337;

			// Randomly pick an energy and material for the particle
            double E = rn(&seed);
            int mat  = pick_mat(&seed);

			#ifdef STATUS
			if( thread == 0 && p % 35 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(p / ( (double)input.particles /
							   (double) input.nthreads )) /
						(double) input.nthreads * 100.0);

			// This loop is dependent!
			// I.e., This loop must be executed sequentially,
			// as each lookup depends on results from the previous lookup.
			for( int i = 0; i < input.lookups; i++ )
				double macro_xs[4] = {0};

				calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); 

				// Results are copied onto heap to avoid some compiler
				// flags (-flto) from optimizing out function call
				memcpy(xs, macro_xs, 4*sizeof(double));

				// Verification hash calculation
                // This method provides a consistent hash accross
                // architectures and compilers.
                #ifdef VERIFICATION
                char line[256];
                sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le",
                       E, mat,
                unsigned long long vhash_local = hash(line, 10000);

                vhash += vhash_local;

                // Randomly pick next energy and material for the particle
                // Also incorporates results from macro_xs lookup to
                // enforce loop dependency.
                // In a real MC app, this dependency is expressed in terms
                // of branching physics sampling, whereas here we are just
                // artificially enforcing this dependence based on altering
                // the seed
                for( int x = 0; x < 4; x++ )
					if( macro_xs[x] > 0 )
                    	seed += 1337*p;
						seed += 42;

                E   = rn(&seed);
                mat = pick_mat(&seed);


		// Accumulate global counters
		g_abrarov = abrarov; 
		g_alls = alls;

		#ifdef PAPI
		if( thread == 0 )
			center_print("PAPI COUNTER RESULTS", 79);
			printf("Count          \tSmybol      \tDescription\n");
			#pragma omp barrier
		counter_stop(&eventset, num_papi_events);
	*abrarov_result = g_abrarov;
	*alls_result = g_alls;
	*vhash_result = vhash;
文件: Main.c 项目: shamouda/ocr-apps
int main( int argc, char* argv[] )
	// =====================================================================
	// Initialization & Command Line Read-In
	// =====================================================================
	int version = 13;
	int mype = 0;
	int max_procs = omp_get_num_procs();
	int i, thread, mat;
	unsigned long seed;
	double omp_start, omp_end, p_energy;
	unsigned long long vhash = 0;
	int nprocs;

	#ifdef MPI
	MPI_Status stat;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &mype);

	// rand() is only used in the serial initialization stages.
	// A custom RNG is used in parallel portions.

	// Process CLI Fields -- store in "Inputs" structure
	Inputs in = read_CLI( argc, argv );

	// Set number of OpenMP Threads

	// Print-out of Input Summary
	if( mype == 0 )
		print_inputs( in, nprocs, version );

	// =====================================================================
	// Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data
	// =====================================================================

	// Allocate & fill energy grids
	#ifndef BINARY_READ
	if( mype == 0) printf("Generating Nuclide Energy Grids...\n");

	NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints);

	generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );

	// Sort grids by energy
	#ifndef BINARY_READ
	if( mype == 0) printf("Sorting Nuclide Energy Grids...\n");
	sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );

	// Prepare Unionized Energy Grid Framework
	#ifndef BINARY_READ
	GridPoint * energy_grid = generate_energy_grid( in.n_isotopes,
	                          in.n_gridpoints, nuclide_grids );
	GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes *
	                           in.n_gridpoints * sizeof( GridPoint ) );
	int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints
	                   * in.n_isotopes * sizeof(int));
	for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ )
		energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes];

	// Double Indexing. Filling in energy_grid with pointers to the
	// nuclide_energy_grids.
	#ifndef BINARY_READ
	set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints );

	if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n");
	binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);

	// Get material data
	if( mype == 0 )
		printf("Loading Mats...\n");
	int *num_nucs  = load_num_nucs(in.n_isotopes);
	int **mats     = load_mats(num_nucs, in.n_isotopes);

	double **concs = load_concs_v(num_nucs);
	double **concs = load_concs(num_nucs);

	if( mype == 0 ) printf("Dumping data to binary file...\n");
	binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n");
	return 0;

	// =====================================================================
	// Cross Section (XS) Parallel Lookup Simulation Begins
	// =====================================================================

	// Outer benchmark loop can loop through all possible # of threads
	for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ )
		in.nthreads = bench_n;

	if( mype == 0 )
		center_print("SIMULATION", 79);

	omp_start = omp_get_wtime();

	//initialize papi with one thread (master) here
	#ifdef PAPI
		fprintf(stderr, "PAPI library init error!\n");

	// OpenMP compiler directives - declaring variables as shared or private
	#pragma omp parallel default(none) \
	private(i, thread, p_energy, mat, seed) \
	shared( max_procs, in, energy_grid, nuclide_grids, \
	        mats, concs, num_nucs, mype, vhash)
		// Initialize parallel PAPI counters
		#ifdef PAPI
		int eventset = PAPI_NULL;
		int num_papi_events;
		#pragma omp critical
			counter_init(&eventset, &num_papi_events);

		double macro_xs_vector[5];
		double * xs = (double *) calloc(5, sizeof(double));

		// Initialize RNG seeds for threads
		thread = omp_get_thread_num();
		seed   = (thread+1)*19+17;

		// XS Lookup Loop
		#pragma omp for schedule(dynamic)
		for( i = 0; i < in.lookups; i++ )
			// Status text
			if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(i / ( (double)in.lookups / (double) in.nthreads ))
						/ (double) in.nthreads * 100.0);

			// Randomly pick an energy and material for the particle
			#pragma omp critical
				p_energy = rn_v();
				mat      = pick_mat(&seed);
			p_energy = rn(&seed);
			mat      = pick_mat(&seed);

			// debugging
			//printf("E = %lf mat = %d\n", p_energy, mat);

			// This returns the macro_xs_vector, but we're not going
			// to do anything with it in this program, so return value
			// is written over.
			calculate_macro_xs( p_energy, mat, in.n_isotopes,
			                    in.n_gridpoints, num_nucs, concs,
			                    energy_grid, nuclide_grids, mats,
                                macro_xs_vector );

			// Copy results from above function call onto heap
			// so that compiler cannot optimize function out
			// (only occurs if -flto flag is used)
			memcpy(xs, macro_xs_vector, 5*sizeof(double));

			// Verification hash calculation
			// This method provides a consistent hash accross
			// architectures and compilers.
			char line[256];
			sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf",
			       p_energy, mat,
			unsigned long long vhash_local = hash(line, 10000);
			#pragma omp atomic
			vhash += vhash_local;

		// Prints out thread local PAPI counters
		#ifdef PAPI
		if( mype == 0 && thread == 0 )
			center_print("PAPI COUNTER RESULTS", 79);
			printf("Count          \tSmybol      \tDescription\n");
		#pragma omp barrier
		counter_stop(&eventset, num_papi_events);


	#ifndef PAPI
	if( mype == 0)
		printf("\n" );
		printf("Simulation complete.\n" );

	omp_end = omp_get_wtime();

	// Print / Save Results and Exit
	print_results( in, mype, omp_end-omp_start, nprocs, vhash );


	#ifdef MPI

	return 0;
void run_event_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result )
	printf("Beginning event based simulation...\n");
	long g_abrarov = 0;
	long g_alls = 0;
	unsigned long vhash = 0;
	#pragma omp parallel default(none) \
	shared(input, data) \
	reduction(+:g_abrarov, g_alls, vhash)
		double * xs = (double *) calloc(4, sizeof(double));
		int thread = omp_get_thread_num();
		long abrarov = 0; 
		long alls = 0;

		#ifdef PAPI
		int eventset = PAPI_NULL; 
		int num_papi_events;
		#pragma omp critical
			counter_init(&eventset, &num_papi_events);
		complex double * sigTfactors =
			(complex double *) malloc( input.numL * sizeof(complex double) );

		// This loop is independent!
		// I.e., macroscopic cross section lookups in the event based simulation
		// can be executed in any order.
		#pragma omp for schedule(guided)
		for( int i = 0; i < input.lookups; i++ )
			// Particles are seeded by their particle ID
			unsigned long seed = ((unsigned long) i+ (unsigned long)1)* (unsigned long) 13371337;

			// Randomly pick an energy and material for the particle
			double E = rn(&seed);
			int mat  = pick_mat(&seed);

			#ifdef STATUS
			if( thread == 0 && i % 2000 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(i / ( (double)input.lookups /
							   (double) input.nthreads )) /
						(double) input.nthreads * 100.0);

			double macro_xs[4] = {0};

			calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); 

			// Results are copied onto heap to avoid some compiler
			// flags (-flto) from optimizing out function call
			memcpy(xs, macro_xs, 4*sizeof(double));

			// Verification hash calculation
			// This method provides a consistent hash accross
			// architectures and compilers.
			char line[256];
			sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le",
				   E, mat,
			unsigned long long vhash_local = hash(line, 10000);

			vhash += vhash_local;


		// Accumulate global counters
		g_abrarov = abrarov; 
		g_alls = alls;

		#ifdef PAPI
		if( thread == 0 )
			center_print("PAPI COUNTER RESULTS", 79);
			printf("Count          \tSmybol      \tDescription\n");
			#pragma omp barrier
		counter_stop(&eventset, num_papi_events);
	*abrarov_result = g_abrarov;
	*alls_result = g_alls;
	*vhash_result = vhash;