コード例 #1
0
ファイル: io.c プロジェクト: ANL-CESAR/SimpleMOC-kernel
// Prints out the summary of User input
void print_input_summary(Input * I)
{
	center_print("INPUT SUMMARY", 79);
	border_print();
	#ifdef OPENMP
	printf("%-25s%d\n", "Number of Threads:", I->nthreads);
	#endif
	printf("%-25s%d\n", "Energy Groups:", I->egroups);
	printf("%-25s%d\n", "2D Source Regions:", I->source_2D_regions);
	printf("%-25s%d\n", "Coarse Axial Intervals:", I->coarse_axial_intervals);
	printf("%-25s%d\n", "Fine Axial Intervals:", I->fine_axial_intervals);
	printf("%-25s%d\n", "Axial Decomposition:", I->decomp_assemblies_ax);
	printf("%-25s%d\n", "3D Source Regions:", I->source_3D_regions);
	printf("%-25s", "Segments:"); fancy_int(I->segments);
	printf("%-25s%.2f\n", "Memory Estimate (MB):", I->nbytes/1024.0/1024.0);
	#ifdef TABLE
	printf("%-25s%s\n", "Exponential Table:","ON");
	#else
	printf("%-25s%s\n", "Exponential Table:","OFF");
	#endif
	#ifdef PAPI
    if( I->papi_event_set == -1)
        printf("%-25s%s\n", "PAPI event to count:", I->event_name);
	#endif
	border_print();
}
コード例 #2
0
void print_inputs(Inputs in, int nprocs, int version )
{
  // Calculate Estimate of Memory Usage
  int mem_tot = estimate_mem_usage( in );
  logo(version);
  center_print("INPUT SUMMARY", 79);
  border_print();
#ifdef VERIFICATION
  printf("Verification Mode:            on\n");
#endif
  printf("Materials:                    %d\n", 12);
  printf("H-M Benchmark Size:           %s\n", in.HM);
  printf("Total Nuclides:               %ld\n", in.n_isotopes);
  printf("Gridpoints (per Nuclide):     ");
  fancy_int(in.n_gridpoints);
  printf("Unionized Energy Gridpoints:  ");
  fancy_int(in.n_isotopes*in.n_gridpoints);
  printf("XS Lookups:                   "); fancy_int(in.lookups);
#ifdef DOMPI
  printf("MPI Ranks:                    %d\n", nprocs);
  printf("OMP Threads per MPI Rank:     %d\n", in.nthreads);
  printf("Mem Usage per MPI Rank (MB):  "); fancy_int(mem_tot);
#else
  printf("Threads:                      %d\n", in.nthreads);
  printf("Est. Memory Usage (MB):       "); fancy_int(mem_tot);
#endif
  border_print();
  center_print("INITIALIZATION", 79);
  border_print();
}
コード例 #3
0
void print_results( Inputs in, int mype, double runtime, int nprocs,
	unsigned long long vhash )
{
  // Calculate Lookups per sec
  int lookups_per_sec = (int) ((double) in.lookups / runtime);
	
  // If running in MPI, reduce timing statistics and calculate average
#ifdef DOMPI
  int total_lookups = 0;
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Reduce(&lookups_per_sec, &total_lookups, 1, MPI_INT,
	     MPI_SUM, 0, MPI_COMM_WORLD);
#endif
	
  // Print output
  if( mype == 0 )
    {
      border_print();
      center_print("RESULTS", 79);
      border_print();

      // Print the results
      printf("Threads:     %d\n", in.nthreads);
#ifdef DOMPI
      printf("MPI ranks:   %d\n", nprocs);
#endif
#ifdef DOMPI
      printf("Total Lookups/s:            ");
      fancy_int(total_lookups);
      printf("Avg Lookups/s per MPI rank: ");
      fancy_int(total_lookups / nprocs);
#else
      printf("Runtime:     %.3lf seconds\n", runtime);
      printf("Lookups:     "); fancy_int(in.lookups);
      printf("Lookups/s:   ");
      fancy_int(lookups_per_sec);
#endif
#ifndef VERIFICATION
      printf("Non-zero Check: %llu\n", vhash);
#else
      printf("Verification checksum: %llu\n", vhash);
#endif
      border_print();

      // For bechmarking, output lookup/s data to file
      if( SAVE )
	{
	  FILE * out = fopen( "results.txt", "a" );
	  fprintf(out, "%d\t%d\n", in.nthreads, lookups_per_sec);
	  fclose(out);
	}
    }
}
コード例 #4
0
ファイル: io.c プロジェクト: shamouda/ocr-apps
// Prints program logo
void logo(int version)
{
	border_print();
	printf(
"                    _____   _____ ____                  _     \n"
"                   |  __ \\ / ____|  _ \\                | |    \n"
"                   | |__) | (___ | |_) | ___ _ __   ___| |__  \n"
"                   |  _  / \\___ \\|  _ < / _ \\ '_ \\ / __| '_ \\ \n"
"                   | | \\ \\ ____) | |_) |  __/ | | | (__| | | |\n"
"                   |_|  \\_\\_____/|____/ \\___|_| |_|\\___|_| |_|\n"
	);
	border_print();
	center_print("Developed at Argonne National Laboratory", 79);
	char v[100];
	sprintf(v, "Version: %d", version);
	center_print(v, 79);
	border_print();
}
コード例 #5
0
// Prints program logo
void logo(int version)
{
	border_print();
	printf(
	"                   __   __ ___________                 _                        \n"
	"                   \\ \\ / //  ___| ___ \\               | |                       \n"
	"                    \\ V / \\ `--.| |_/ / ___ _ __   ___| |__                     \n"
	"                    /   \\  `--. \\ ___ \\/ _ \\ '_ \\ / __| '_ \\                    \n"
	"                   / /^\\ \\/\\__/ / |_/ /  __/ | | | (__| | | |                   \n"
	"                   \\/   \\/\\____/\\____/ \\___|_| |_|\\___|_| |_|                   \n\n"
    );
	border_print();
	center_print("Developed at Argonne National Laboratory", 79);
	char v[100];
	sprintf(v, "Version: %d", version);
	center_print(v, 79);
	border_print();
}
コード例 #6
0
ファイル: io.c プロジェクト: ANL-CESAR/SimpleMOC-kernel
// Prints program logo
void logo(int version)
{
	border_print();
	printf(
"   __           __        ___        __   __           ___  __        ___     \n"
"  /__` |  |\\/| |__) |    |__   |\\/| /  \\ /  ` __ |__/ |__  |__) |\\ | |__  |   \n"
"  .__/ |  |  | |    |___ |___  |  | \\__/ \\__,    |  \\ |___ |  \\ | \\| |___ |___\n" 
	);
	printf("\n");
	border_print();
	printf("\n");

	center_print("Developed at", 79);
	center_print("The Massachusetts Institute of Technology", 79);
	center_print("and", 79);
	center_print("Argonne National Laboratory", 79);
	printf("\n");
	char v[100];
	sprintf(v, "Version: %d", version);
	center_print(v, 79);
	printf("\n");
	border_print();
}
コード例 #7
0
ファイル: main.c プロジェクト: ANL-CESAR/SimpleMOC
int main( int argc, char * argv[] )
{
	int version = 4;
	int mype = 0;

	#ifdef MPI
	int nranks;
	MPI_Status stat;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nranks);
	MPI_Comm_rank(MPI_COMM_WORLD, &mype);
	#endif

	#ifdef PAPI
	papi_serial_init();
	#endif

	srand(time(NULL) * (mype+1));

	Input input = set_default_input();
	read_CLI( argc, argv, &input );
	calculate_derived_inputs( &input );

	if( mype == 0 )
		logo(version);

	#ifdef OPENMP
	omp_set_num_threads(input.nthreads); 
	#endif

	Params params = build_tracks( &input );
	CommGrid grid = init_mpi_grid( input );

	if( mype == 0 )
		print_input_summary(input);

	float res;
	float keff = 1.0;
	int num_iters = 1;


	double time_transport = 0;
	double time_flux_exchange = 0;
	double time_renormalize_flux = 0;
	double time_update_sources = 0;
	double time_compute_keff = 0;
	double start, stop;

	if(mype==0)
	{
		center_print("SIMULATION", 79);
		border_print();
	}

	for( int i = 0; i < num_iters; i++)
	{
		// Transport Sweep
		start = get_time();
		transport_sweep(&params, &input);
		stop = get_time();
		time_transport += stop-start;

		// Domain Boundary Flux Exchange (MPI)
		#ifdef MPI
		start = get_time();
		fast_transfer_boundary_fluxes(params, input, grid);
		stop = get_time();
		time_flux_exchange += stop-start;
		#endif

		// Flux Renormalization
		start = get_time();
		renormalize_flux(params,input, grid);
		stop = get_time();
		time_renormalize_flux += stop-start;

		// Update Source Regions
		start = get_time();
		res = update_sources(params, input, keff);
		stop = get_time();
		time_update_sources += stop-start;

		// Calculate K-Effective
		start = get_time();
		keff = compute_keff(params, input, grid);
		stop = get_time();
		time_compute_keff += stop-start;
		if( mype == 0 )
			printf("keff = %f\n", keff);
	}

	double time_total = time_transport + time_flux_exchange 
		+ time_renormalize_flux + time_update_sources + time_compute_keff;

	if( mype == 0 )
	{
		border_print();
		center_print("RESULTS SUMMARY", 79);
		border_print();

		printf("Transport Sweep Time:         %6.2lf sec   (%4.1lf%%)\n", 
				time_transport, 100*time_transport/time_total);
		printf("Domain Flux Exchange Time:    %6.2lf sec   (%4.1lf%%)\n", 
				time_flux_exchange, 100*time_flux_exchange/time_total);
		printf("Flux Renormalization Time:    %6.2lf sec   (%4.1lf%%)\n", 
				time_renormalize_flux, 100*time_renormalize_flux/time_total);
		printf("Update Source Time:           %6.2lf sec   (%4.1lf%%)\n", 
				time_update_sources, 100*time_update_sources/time_total);
		printf("K-Effective Calc Time:        %6.2lf sec   (%4.1lf%%)\n", 
				time_compute_keff, 100*time_compute_keff/time_total);
		printf("Total Time:                   %6.2lf sec\n", time_total);
	}

	long tracks_per_second = 2 * input.ntracks/time_transport;

	#ifdef MPI
	MPI_Barrier(grid.cart_comm_3d);
	long global_tps = 0;
	MPI_Reduce( &tracks_per_second, // Send Buffer
			&global_tps,            // Receive Buffer
			1,                    	// Element Count
			MPI_LONG,           	// Element Type
			MPI_SUM,              	// Reduciton Operation Type
			0,                    	// Master Rank
			grid.cart_comm_3d );  	// MPI Communicator
	MPI_Barrier(grid.cart_comm_3d);
	tracks_per_second = global_tps;
	#endif

	if( mype == 0 )
	{
		printf("Time per Intersection:          ");
		printf("%.2lf ns\n", time_per_intersection( input, time_transport ));
		border_print();
	}

	free_2D_tracks( params.tracks_2D );
	free_tracks( params.tracks );

	#ifdef MPI
	MPI_Finalize();
	#endif

	return 0;
}
コード例 #8
0
ファイル: kernel.c プロジェクト: ANL-CESAR/SimpleMOC-kernel
void run_kernel( Input * I, Source * S, Table * table)
{
	// Enter Parallel Region
	#pragma omp parallel default(none) shared(I, S, table)
	{
		#ifdef OPENMP
		int thread = omp_get_thread_num();
		#else
		int thread = 0;
		#endif

		// Create Thread Local Random Seed
		unsigned int seed = time(NULL) * (thread+1);

		// Allocate Thread Local SIMD Vectors (align if using intel compiler)
		#ifdef INTEL
		SIMD_Vectors simd_vecs = aligned_allocate_simd_vectors(I);
		float * state_flux = (float *) _mm_malloc(
				I->egroups * sizeof(float), 64);
		#else
		SIMD_Vectors simd_vecs = allocate_simd_vectors(I);
		float * state_flux = (float *) malloc(
				I->egroups * sizeof(float));
		#endif

		// Allocate Thread Local Flux Vector
		for( int i = 0; i < I->egroups; i++ )
			state_flux[i] = (float) rand_r(&seed) / RAND_MAX;

		// Initialize PAPI Counters (if enabled)
		#ifdef PAPI
		int eventset = PAPI_NULL;
		int num_papi_events;
		#pragma omp critical
		{
			counter_init(&eventset, &num_papi_events, I);
		}
		#endif

		// Enter OMP For Loop over Segments
		#pragma omp for schedule(dynamic,100)
		for( long i = 0; i < I->segments; i++ )
		{
			// Pick Random QSR
			int QSR_id = rand_r(&seed) % I->source_3D_regions;

			// Pick Random Fine Axial Interval
			int FAI_id = rand_r(&seed) % I->fine_axial_intervals;

			// Attenuate Segment
			attenuate_segment( I, S, QSR_id, FAI_id, state_flux,
					&simd_vecs, table);
		}

		// Stop PAPI Counters
		#ifdef PAPI
		if( thread == 0 )
		{
			printf("\n");
			border_print();
			center_print("PAPI COUNTER RESULTS", 79);
			border_print();
			printf("Count          \tSmybol      \tDescription\n");
		}
		{
			#pragma omp barrier
		}
		counter_stop(&eventset, num_papi_events, I);
		#endif
	}
}
コード例 #9
0
ファイル: Main.c プロジェクト: shamouda/ocr-apps
int main( int argc, char* argv[] )
{
	// =====================================================================
	// Initialization & Command Line Read-In
	// =====================================================================
	int version = 13;
	int mype = 0;
	int max_procs = omp_get_num_procs();
	int i, thread, mat;
	unsigned long seed;
	double omp_start, omp_end, p_energy;
	unsigned long long vhash = 0;
	int nprocs;

	#ifdef MPI
	MPI_Status stat;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &mype);
	#endif

	// rand() is only used in the serial initialization stages.
	// A custom RNG is used in parallel portions.
	#ifdef VERIFICATION
	srand(26);
	#else
	srand(time(NULL));
	#endif

	// Process CLI Fields -- store in "Inputs" structure
	Inputs in = read_CLI( argc, argv );

	// Set number of OpenMP Threads
	omp_set_num_threads(in.nthreads);

	// Print-out of Input Summary
	if( mype == 0 )
		print_inputs( in, nprocs, version );

	// =====================================================================
	// Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data
	// =====================================================================

	// Allocate & fill energy grids
	#ifndef BINARY_READ
	if( mype == 0) printf("Generating Nuclide Energy Grids...\n");
	#endif

	NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints);

	#ifdef VERIFICATION
	generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#else
	generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	// Sort grids by energy
	#ifndef BINARY_READ
	if( mype == 0) printf("Sorting Nuclide Energy Grids...\n");
	sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	// Prepare Unionized Energy Grid Framework
	#ifndef BINARY_READ
	GridPoint * energy_grid = generate_energy_grid( in.n_isotopes,
	                          in.n_gridpoints, nuclide_grids );
	#else
	GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes *
	                           in.n_gridpoints * sizeof( GridPoint ) );
	int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints
	                   * in.n_isotopes * sizeof(int));
	for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ )
		energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes];
	#endif

	// Double Indexing. Filling in energy_grid with pointers to the
	// nuclide_energy_grids.
	#ifndef BINARY_READ
	set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	#ifdef BINARY_READ
	if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n");
	binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	#endif

	// Get material data
	if( mype == 0 )
		printf("Loading Mats...\n");
	int *num_nucs  = load_num_nucs(in.n_isotopes);
	int **mats     = load_mats(num_nucs, in.n_isotopes);

	#ifdef VERIFICATION
	double **concs = load_concs_v(num_nucs);
	#else
	double **concs = load_concs(num_nucs);
	#endif

	#ifdef BINARY_DUMP
	if( mype == 0 ) printf("Dumping data to binary file...\n");
	binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n");
	return 0;
	#endif

	// =====================================================================
	// Cross Section (XS) Parallel Lookup Simulation Begins
	// =====================================================================

	// Outer benchmark loop can loop through all possible # of threads
	#ifdef BENCHMARK
	for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ )
	{
		in.nthreads = bench_n;
		omp_set_num_threads(in.nthreads);
 	#endif

	if( mype == 0 )
	{
		printf("\n");
		border_print();
		center_print("SIMULATION", 79);
		border_print();
	}

	omp_start = omp_get_wtime();

	//initialize papi with one thread (master) here
	#ifdef PAPI
	if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){
		fprintf(stderr, "PAPI library init error!\n");
		exit(1);
	}
	#endif

	// OpenMP compiler directives - declaring variables as shared or private
	#pragma omp parallel default(none) \
	private(i, thread, p_energy, mat, seed) \
	shared( max_procs, in, energy_grid, nuclide_grids, \
	        mats, concs, num_nucs, mype, vhash)
	{
		// Initialize parallel PAPI counters
		#ifdef PAPI
		int eventset = PAPI_NULL;
		int num_papi_events;
		#pragma omp critical
		{
			counter_init(&eventset, &num_papi_events);
		}
		#endif

		double macro_xs_vector[5];
		double * xs = (double *) calloc(5, sizeof(double));

		// Initialize RNG seeds for threads
		thread = omp_get_thread_num();
		seed   = (thread+1)*19+17;

		// XS Lookup Loop
		#pragma omp for schedule(dynamic)
		for( i = 0; i < in.lookups; i++ )
		{
			// Status text
			if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(i / ( (double)in.lookups / (double) in.nthreads ))
						/ (double) in.nthreads * 100.0);

			// Randomly pick an energy and material for the particle
			#ifdef VERIFICATION
			#pragma omp critical
			{
				p_energy = rn_v();
				mat      = pick_mat(&seed);
			}
			#else
			p_energy = rn(&seed);
			mat      = pick_mat(&seed);
			#endif

			// debugging
			//printf("E = %lf mat = %d\n", p_energy, mat);

			// This returns the macro_xs_vector, but we're not going
			// to do anything with it in this program, so return value
			// is written over.
			calculate_macro_xs( p_energy, mat, in.n_isotopes,
			                    in.n_gridpoints, num_nucs, concs,
			                    energy_grid, nuclide_grids, mats,
                                macro_xs_vector );

			// Copy results from above function call onto heap
			// so that compiler cannot optimize function out
			// (only occurs if -flto flag is used)
			memcpy(xs, macro_xs_vector, 5*sizeof(double));

			// Verification hash calculation
			// This method provides a consistent hash accross
			// architectures and compilers.
			#ifdef VERIFICATION
			char line[256];
			sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf",
			       p_energy, mat,
				   macro_xs_vector[0],
				   macro_xs_vector[1],
				   macro_xs_vector[2],
				   macro_xs_vector[3],
				   macro_xs_vector[4]);
			unsigned long long vhash_local = hash(line, 10000);
			#pragma omp atomic
			vhash += vhash_local;
			#endif
		}

		// Prints out thread local PAPI counters
		#ifdef PAPI
		if( mype == 0 && thread == 0 )
		{
			printf("\n");
			border_print();
			center_print("PAPI COUNTER RESULTS", 79);
			border_print();
			printf("Count          \tSmybol      \tDescription\n");
		}
		{
		#pragma omp barrier
		}
		counter_stop(&eventset, num_papi_events);
		#endif

	}

	#ifndef PAPI
	if( mype == 0)
	{
		printf("\n" );
		printf("Simulation complete.\n" );
	}
	#endif

	omp_end = omp_get_wtime();

	// Print / Save Results and Exit
	print_results( in, mype, omp_end-omp_start, nprocs, vhash );

	#ifdef BENCHMARK
	}
	#endif

	#ifdef MPI
	MPI_Finalize();
	#endif

	return 0;
}
コード例 #10
0
ファイル: main.c プロジェクト: wangsnowyin/monte-carlo
int main(int argc, char *argv[])
{
  Parameters *parameters; // user defined parameters
  Geometry *geometry; // homogenous cube geometry
  Material *material; // problem material
  Bank *source_bank; // array for particle source sites
  Tally *tally; // scalar flux tally
  double *keff; // effective multiplication factor
  double t1, t2; // timers

  #ifdef _OPENMP
    unsigned long counter = 0; //counter to decide the start pos of master bank copy from sub banks
    Bank *g_fission_bank; //global fission bank
  #endif

  // Get inputs: set parameters to default values, parse parameter file,
  // override with any command line inputs, and print parameters
  parameters = init_parameters();
  parse_parameters(parameters);
  read_CLI(argc, argv, parameters);
  print_parameters(parameters);


  // Set initial RNG seed
  set_initial_seed(parameters->seed);
  set_stream(STREAM_INIT);

  // Create files for writing results to
  init_output(parameters);

  // Set up geometry
  geometry = init_geometry(parameters);

  // Set up material
  material = init_material(parameters);

  // Set up tallies
  tally = init_tally(parameters);

  // Create source bank and initial source distribution
  source_bank = init_source_bank(parameters, geometry);

  // Create fission bank
  #ifdef _OPENMP
    omp_set_num_threads(parameters->n_threads); // Set number of openmp threads
    printf("threads num: %d\n", parameters->n_threads);
    // Allocate one master fission bank
    g_fission_bank = init_bank(2*parameters->n_particles);
  #endif

  // Set up array for k effective
  keff = calloc(parameters->n_active, sizeof(double));

  center_print("SIMULATION", 79);
  border_print();
  printf("%-15s %-15s %-15s\n", "BATCH", "KEFF", "MEAN KEFF");

  #ifdef _OPENMP
    // Start time
    t1 = omp_get_wtime();

    run_eigenvalue(counter, g_fission_bank, parameters, geometry, material, source_bank, fission_bank, tally, keff);

    // Stop time
    t2 = omp_get_wtime();
  #endif

  printf("Simulation time: %f secs\n", t2-t1);

  // Free memory
  #ifdef _OPENMP
    free_bank(g_fission_bank);
  #endif

  free(keff);
  free_tally(tally);
  free_bank(source_bank);
  free_material(material);
  free(geometry);
  free(parameters);

  return 0;
}
コード例 #11
0
ファイル: papi.c プロジェクト: ANL-CESAR/SimpleMOC-kernel
// Stops the papi counters and prints results
void counter_stop( int * eventset, int num_papi_events, Input * I )
{
	int * events = malloc(num_papi_events * sizeof(int));
	int n = num_papi_events;
	PAPI_list_events( *eventset, events, &n );
	PAPI_event_info_t info;

	long_long * values = malloc( num_papi_events * sizeof(long_long));
	PAPI_stop(*eventset, values);
	int thread = omp_get_thread_num();
	int nthreads = omp_get_num_threads();

	static long LLC_cache_miss = 0;
	static long total_cycles = 0;
	static long FLOPS = 0;
	static long stall_any = 0;
	static long stall_SB = 0;
	static long stall_RS = 0;
	static long stall_OO = 0;
	static long tlb_load = 0;
	static long tlb_load_m = 0;
	static long tlb_store = 0;
	static long tlb_store_m = 0;

    #pragma omp master
    {
        I->vals_accum = malloc( num_papi_events * sizeof(long long));
        for(int i=0; i < num_papi_events ; i ++)
            I->vals_accum[i] = 0;
    }
    #pragma omp barrier

	#pragma omp critical (papi)
	{
		printf("Thread %d\n", thread);
		for( int i = 0; i < num_papi_events; i++ )
		{
            I->vals_accum[i] += values[i];
			PAPI_get_event_info(events[i], &info);
			printf("%-15lld\t%s\t%s\n", values[i],info.symbol,info.long_descr);
			if( strcmp(info.symbol, "PAPI_L3_TCM") == 0 )
				LLC_cache_miss += values[i];
			if( strcmp(info.symbol, "PAPI_TOT_CYC") == 0 )
				total_cycles += values[i];
			if( strcmp(info.symbol, "PAPI_SP_OPS") == 0 )
				FLOPS += values[i];
			if( strcmp(info.symbol, "RESOURCE_STALLS:ANY") == 0 )
				stall_any += values[i];
			if( strcmp(info.symbol, "RESOURCE_STALLS:SB") == 0 )
				stall_SB += values[i];
			if( strcmp(info.symbol, "RESOURCE_STALLS:RS") == 0 )
				stall_RS += values[i];
			if( strcmp(info.symbol, "RESOURCE_STALLS2:OOO_RSRC") == 0 )
				stall_OO += values[i];
			if( strcmp(info.symbol, "perf::DTLB-LOADS") == 0 )
				tlb_load += values[i];
			if( strcmp(info.symbol, "perf::DTLB-LOAD-MISSES") == 0 )
				tlb_load_m += values[i];
			if( strcmp(info.symbol, "perf::DTLB-STORES") == 0 )
				tlb_store += values[i];
			if( strcmp(info.symbol, "perf::DTLB-STORE-MISSES") == 0 )
				tlb_store_m += values[i];
		}
		free(values);	
	}
	{
		#pragma omp barrier
	}
	#pragma omp master
	{
        if( omp_get_num_threads() > 1){
            printf("Thread Totals:\n");
            for( int i = 0; i < num_papi_events; i++ )
            {
                PAPI_get_event_info(events[i], &info);
                printf("%-15lld\t%s\t%s\n", I->vals_accum[i],info.symbol,info.long_descr);
            }
        }
        free( I->vals_accum );

		border_print();
		center_print("PERFORMANCE SUMMARY", 79);
		border_print();
		long cycles = (long) (total_cycles / (double) nthreads);
		double bw = LLC_cache_miss*64./cycles*2.8e9/1024./1024./1024.;
		if( I->papi_event_set == 0 )
			printf("GFLOPs: %.3lf\n", FLOPS / (double) cycles * 2.8  );
		if( I->papi_event_set == 1 )
			printf("Bandwidth: %.3lf (GB/s)\n", bw);
		if( I->papi_event_set == 2 )
		{
			printf("%-30s %.2lf%%\n", "Store Buffer Full:",
					stall_SB / (double) stall_any * 100.);
			printf("%-30s %.2lf%%\n", "Reservation Station Full:",
					stall_RS / (double) stall_any * 100.);
			printf("%-30s %.2lf%%\n", "OO Pipeline Full:",
					stall_OO / (double) stall_any * 100.);
		}
		if( I->papi_event_set == 3 )
			printf("CPU Stalled Cycles: %.2lf%%\n",
					stall_any / (double) total_cycles * 100.);	
		if( I->papi_event_set == 7 )
		{
			printf("%-30s %.2lf%%\n", "Data TLB Load Miss Rate: ",
					tlb_load_m / (double) tlb_load * 100 );
			printf("%-30s %.2lf%%\n", "Data TLB Store Miss Rate: ",
					tlb_store_m / (double) tlb_store * 100 );
		}

		border_print();
	}
    free(events);
}
コード例 #12
0
ファイル: simulation.c プロジェクト: ANL-CESAR/RSBench
void run_history_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result )
{
	printf("Beginning history based simulation...\n");
	long g_abrarov = 0;
	long g_alls = 0;
	unsigned long vhash = 0;
	#pragma omp parallel default(none) \
	shared(input, data) \
	reduction(+:g_abrarov, g_alls, vhash)
	{
		double * xs = (double *) calloc(4, sizeof(double));
		int thread = omp_get_thread_num();
		long abrarov = 0; 
		long alls = 0;

		#ifdef PAPI
		int eventset = PAPI_NULL; 
		int num_papi_events;
		#pragma omp critical
		{
			counter_init(&eventset, &num_papi_events);
		}
		#endif
		complex double * sigTfactors =
			(complex double *) malloc( input.numL * sizeof(complex double) );

		// This loop is independent!
		// I.e., particle histories can be executed in any order
		#pragma omp for schedule(guided)
		for( int p = 0; p < input.particles; p++ )
		{
			// Particles are seeded by their particle ID
            unsigned long seed = ((unsigned long) p+ (unsigned long)1)* (unsigned long) 13371337;

			// Randomly pick an energy and material for the particle
            double E = rn(&seed);
            int mat  = pick_mat(&seed);

			#ifdef STATUS
			if( thread == 0 && p % 35 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(p / ( (double)input.particles /
							   (double) input.nthreads )) /
						(double) input.nthreads * 100.0);
			#endif

			// This loop is dependent!
			// I.e., This loop must be executed sequentially,
			// as each lookup depends on results from the previous lookup.
			for( int i = 0; i < input.lookups; i++ )
			{
				double macro_xs[4] = {0};

				calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); 

				// Results are copied onto heap to avoid some compiler
				// flags (-flto) from optimizing out function call
				memcpy(xs, macro_xs, 4*sizeof(double));

				// Verification hash calculation
                // This method provides a consistent hash accross
                // architectures and compilers.
                #ifdef VERIFICATION
                char line[256];
                sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le",
                       E, mat,
                       macro_xs[0],
                       macro_xs[1],
                       macro_xs[2],
                       macro_xs[3]);
                unsigned long long vhash_local = hash(line, 10000);

                vhash += vhash_local;
                #endif

                // Randomly pick next energy and material for the particle
                // Also incorporates results from macro_xs lookup to
                // enforce loop dependency.
                // In a real MC app, this dependency is expressed in terms
                // of branching physics sampling, whereas here we are just
                // artificially enforcing this dependence based on altering
                // the seed
                for( int x = 0; x < 4; x++ )
				{
					if( macro_xs[x] > 0 )
                    	seed += 1337*p;
					else
						seed += 42;
				}

                E   = rn(&seed);
                mat = pick_mat(&seed);
			}
		}

		free(sigTfactors);

		// Accumulate global counters
		g_abrarov = abrarov; 
		g_alls = alls;

		#ifdef PAPI
		if( thread == 0 )
		{
			printf("\n");
			border_print();
			center_print("PAPI COUNTER RESULTS", 79);
			border_print();
			printf("Count          \tSmybol      \tDescription\n");
		}
		{
			#pragma omp barrier
		}
		counter_stop(&eventset, num_papi_events);
		#endif
	}
	*abrarov_result = g_abrarov;
	*alls_result = g_alls;
	*vhash_result = vhash;
}
コード例 #13
0
ファイル: simulation.c プロジェクト: ANL-CESAR/RSBench
void run_event_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result )
{
	printf("Beginning event based simulation...\n");
	long g_abrarov = 0;
	long g_alls = 0;
	unsigned long vhash = 0;
	#pragma omp parallel default(none) \
	shared(input, data) \
	reduction(+:g_abrarov, g_alls, vhash)
	{
		double * xs = (double *) calloc(4, sizeof(double));
		int thread = omp_get_thread_num();
		long abrarov = 0; 
		long alls = 0;

		#ifdef PAPI
		int eventset = PAPI_NULL; 
		int num_papi_events;
		#pragma omp critical
		{
			counter_init(&eventset, &num_papi_events);
		}
		#endif
		complex double * sigTfactors =
			(complex double *) malloc( input.numL * sizeof(complex double) );

		// This loop is independent!
		// I.e., macroscopic cross section lookups in the event based simulation
		// can be executed in any order.
		#pragma omp for schedule(guided)
		for( int i = 0; i < input.lookups; i++ )
		{
			// Particles are seeded by their particle ID
			unsigned long seed = ((unsigned long) i+ (unsigned long)1)* (unsigned long) 13371337;

			// Randomly pick an energy and material for the particle
			double E = rn(&seed);
			int mat  = pick_mat(&seed);

			#ifdef STATUS
			if( thread == 0 && i % 2000 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(i / ( (double)input.lookups /
							   (double) input.nthreads )) /
						(double) input.nthreads * 100.0);
			#endif

			double macro_xs[4] = {0};

			calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); 

			// Results are copied onto heap to avoid some compiler
			// flags (-flto) from optimizing out function call
			memcpy(xs, macro_xs, 4*sizeof(double));

			// Verification hash calculation
			// This method provides a consistent hash accross
			// architectures and compilers.
			#ifdef VERIFICATION
			char line[256];
			sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le",
				   E, mat,
				   macro_xs[0],
				   macro_xs[1],
				   macro_xs[2],
				   macro_xs[3]);
			unsigned long long vhash_local = hash(line, 10000);

			vhash += vhash_local;
			#endif
		}

		free(sigTfactors);

		// Accumulate global counters
		g_abrarov = abrarov; 
		g_alls = alls;

		#ifdef PAPI
		if( thread == 0 )
		{
			printf("\n");
			border_print();
			center_print("PAPI COUNTER RESULTS", 79);
			border_print();
			printf("Count          \tSmybol      \tDescription\n");
		}
		{
			#pragma omp barrier
		}
		counter_stop(&eventset, num_papi_events);
		#endif
	}
	*abrarov_result = g_abrarov;
	*alls_result = g_alls;
	*vhash_result = vhash;
}
コード例 #14
0
ファイル: main.c プロジェクト: amandalund/simple-mc
int main(int argc, char *argv[])
{
  Parameters *parameters; // user defined parameters
  Geometry *geometry; // homogenous cube geometry
  Material *material; // problem material
  Bank *source_bank; // array for particle source sites
  Bank *fission_bank; // array for particle fission sites
  Tally *tally; // scalar flux tally
  double *keff; // effective multiplication factor
  double t1, t2; // timers

  // Get inputs: set parameters to default values, parse parameter file,
  // override with any command line inputs, and print parameters
  parameters = init_parameters();
  parse_parameters(parameters);
  read_CLI(argc, argv, parameters);
  print_parameters(parameters);

  // Set initial RNG seed
  set_initial_seed(parameters->seed);
  set_stream(STREAM_OTHER);

  // Create files for writing results to
  init_output(parameters);

  // Set up geometry
  geometry = init_geometry(parameters);

  // Set up material
  material = init_material(parameters);

  // Set up tallies
  tally = init_tally(parameters);

  // Create source bank and initial source distribution
  source_bank = init_source_bank(parameters, geometry);

  // Create fission bank
  fission_bank = init_fission_bank(parameters);

  // Set up array for k effective
  keff = calloc(parameters->n_active, sizeof(double));

  center_print("SIMULATION", 79);
  border_print();
  printf("%-15s %-15s %-15s %-15s\n", "BATCH", "ENTROPY", "KEFF", "MEAN KEFF");

  // Start time
  t1 = timer();

  run_eigenvalue(parameters, geometry, material, source_bank, fission_bank, tally, keff);

  // Stop time
  t2 = timer();

  printf("Simulation time: %f secs\n", t2-t1);

  // Free memory
  free(keff);
  free_tally(tally);
  free_bank(fission_bank);
  free_bank(source_bank);
  free_material(material);
  free(geometry);
  free(parameters);

  return 0;
}