// Prints out the summary of User input void print_input_summary(Input * I) { center_print("INPUT SUMMARY", 79); border_print(); #ifdef OPENMP printf("%-25s%d\n", "Number of Threads:", I->nthreads); #endif printf("%-25s%d\n", "Energy Groups:", I->egroups); printf("%-25s%d\n", "2D Source Regions:", I->source_2D_regions); printf("%-25s%d\n", "Coarse Axial Intervals:", I->coarse_axial_intervals); printf("%-25s%d\n", "Fine Axial Intervals:", I->fine_axial_intervals); printf("%-25s%d\n", "Axial Decomposition:", I->decomp_assemblies_ax); printf("%-25s%d\n", "3D Source Regions:", I->source_3D_regions); printf("%-25s", "Segments:"); fancy_int(I->segments); printf("%-25s%.2f\n", "Memory Estimate (MB):", I->nbytes/1024.0/1024.0); #ifdef TABLE printf("%-25s%s\n", "Exponential Table:","ON"); #else printf("%-25s%s\n", "Exponential Table:","OFF"); #endif #ifdef PAPI if( I->papi_event_set == -1) printf("%-25s%s\n", "PAPI event to count:", I->event_name); #endif border_print(); }
void print_inputs(Inputs in, int nprocs, int version ) { // Calculate Estimate of Memory Usage int mem_tot = estimate_mem_usage( in ); logo(version); center_print("INPUT SUMMARY", 79); border_print(); #ifdef VERIFICATION printf("Verification Mode: on\n"); #endif printf("Materials: %d\n", 12); printf("H-M Benchmark Size: %s\n", in.HM); printf("Total Nuclides: %ld\n", in.n_isotopes); printf("Gridpoints (per Nuclide): "); fancy_int(in.n_gridpoints); printf("Unionized Energy Gridpoints: "); fancy_int(in.n_isotopes*in.n_gridpoints); printf("XS Lookups: "); fancy_int(in.lookups); #ifdef DOMPI printf("MPI Ranks: %d\n", nprocs); printf("OMP Threads per MPI Rank: %d\n", in.nthreads); printf("Mem Usage per MPI Rank (MB): "); fancy_int(mem_tot); #else printf("Threads: %d\n", in.nthreads); printf("Est. Memory Usage (MB): "); fancy_int(mem_tot); #endif border_print(); center_print("INITIALIZATION", 79); border_print(); }
void print_results( Inputs in, int mype, double runtime, int nprocs, unsigned long long vhash ) { // Calculate Lookups per sec int lookups_per_sec = (int) ((double) in.lookups / runtime); // If running in MPI, reduce timing statistics and calculate average #ifdef DOMPI int total_lookups = 0; MPI_Barrier(MPI_COMM_WORLD); MPI_Reduce(&lookups_per_sec, &total_lookups, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); #endif // Print output if( mype == 0 ) { border_print(); center_print("RESULTS", 79); border_print(); // Print the results printf("Threads: %d\n", in.nthreads); #ifdef DOMPI printf("MPI ranks: %d\n", nprocs); #endif #ifdef DOMPI printf("Total Lookups/s: "); fancy_int(total_lookups); printf("Avg Lookups/s per MPI rank: "); fancy_int(total_lookups / nprocs); #else printf("Runtime: %.3lf seconds\n", runtime); printf("Lookups: "); fancy_int(in.lookups); printf("Lookups/s: "); fancy_int(lookups_per_sec); #endif #ifndef VERIFICATION printf("Non-zero Check: %llu\n", vhash); #else printf("Verification checksum: %llu\n", vhash); #endif border_print(); // For bechmarking, output lookup/s data to file if( SAVE ) { FILE * out = fopen( "results.txt", "a" ); fprintf(out, "%d\t%d\n", in.nthreads, lookups_per_sec); fclose(out); } } }
// Prints program logo void logo(int version) { border_print(); printf( " _____ _____ ____ _ \n" " | __ \\ / ____| _ \\ | | \n" " | |__) | (___ | |_) | ___ _ __ ___| |__ \n" " | _ / \\___ \\| _ < / _ \\ '_ \\ / __| '_ \\ \n" " | | \\ \\ ____) | |_) | __/ | | | (__| | | |\n" " |_| \\_\\_____/|____/ \\___|_| |_|\\___|_| |_|\n" ); border_print(); center_print("Developed at Argonne National Laboratory", 79); char v[100]; sprintf(v, "Version: %d", version); center_print(v, 79); border_print(); }
// Prints program logo void logo(int version) { border_print(); printf( " __ __ ___________ _ \n" " \\ \\ / // ___| ___ \\ | | \n" " \\ V / \\ `--.| |_/ / ___ _ __ ___| |__ \n" " / \\ `--. \\ ___ \\/ _ \\ '_ \\ / __| '_ \\ \n" " / /^\\ \\/\\__/ / |_/ / __/ | | | (__| | | | \n" " \\/ \\/\\____/\\____/ \\___|_| |_|\\___|_| |_| \n\n" ); border_print(); center_print("Developed at Argonne National Laboratory", 79); char v[100]; sprintf(v, "Version: %d", version); center_print(v, 79); border_print(); }
// Prints program logo void logo(int version) { border_print(); printf( " __ __ ___ __ __ ___ __ ___ \n" " /__` | |\\/| |__) | |__ |\\/| / \\ / ` __ |__/ |__ |__) |\\ | |__ | \n" " .__/ | | | | |___ |___ | | \\__/ \\__, | \\ |___ | \\ | \\| |___ |___\n" ); printf("\n"); border_print(); printf("\n"); center_print("Developed at", 79); center_print("The Massachusetts Institute of Technology", 79); center_print("and", 79); center_print("Argonne National Laboratory", 79); printf("\n"); char v[100]; sprintf(v, "Version: %d", version); center_print(v, 79); printf("\n"); border_print(); }
int main( int argc, char * argv[] ) { int version = 4; int mype = 0; #ifdef MPI int nranks; MPI_Status stat; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nranks); MPI_Comm_rank(MPI_COMM_WORLD, &mype); #endif #ifdef PAPI papi_serial_init(); #endif srand(time(NULL) * (mype+1)); Input input = set_default_input(); read_CLI( argc, argv, &input ); calculate_derived_inputs( &input ); if( mype == 0 ) logo(version); #ifdef OPENMP omp_set_num_threads(input.nthreads); #endif Params params = build_tracks( &input ); CommGrid grid = init_mpi_grid( input ); if( mype == 0 ) print_input_summary(input); float res; float keff = 1.0; int num_iters = 1; double time_transport = 0; double time_flux_exchange = 0; double time_renormalize_flux = 0; double time_update_sources = 0; double time_compute_keff = 0; double start, stop; if(mype==0) { center_print("SIMULATION", 79); border_print(); } for( int i = 0; i < num_iters; i++) { // Transport Sweep start = get_time(); transport_sweep(¶ms, &input); stop = get_time(); time_transport += stop-start; // Domain Boundary Flux Exchange (MPI) #ifdef MPI start = get_time(); fast_transfer_boundary_fluxes(params, input, grid); stop = get_time(); time_flux_exchange += stop-start; #endif // Flux Renormalization start = get_time(); renormalize_flux(params,input, grid); stop = get_time(); time_renormalize_flux += stop-start; // Update Source Regions start = get_time(); res = update_sources(params, input, keff); stop = get_time(); time_update_sources += stop-start; // Calculate K-Effective start = get_time(); keff = compute_keff(params, input, grid); stop = get_time(); time_compute_keff += stop-start; if( mype == 0 ) printf("keff = %f\n", keff); } double time_total = time_transport + time_flux_exchange + time_renormalize_flux + time_update_sources + time_compute_keff; if( mype == 0 ) { border_print(); center_print("RESULTS SUMMARY", 79); border_print(); printf("Transport Sweep Time: %6.2lf sec (%4.1lf%%)\n", time_transport, 100*time_transport/time_total); printf("Domain Flux Exchange Time: %6.2lf sec (%4.1lf%%)\n", time_flux_exchange, 100*time_flux_exchange/time_total); printf("Flux Renormalization Time: %6.2lf sec (%4.1lf%%)\n", time_renormalize_flux, 100*time_renormalize_flux/time_total); printf("Update Source Time: %6.2lf sec (%4.1lf%%)\n", time_update_sources, 100*time_update_sources/time_total); printf("K-Effective Calc Time: %6.2lf sec (%4.1lf%%)\n", time_compute_keff, 100*time_compute_keff/time_total); printf("Total Time: %6.2lf sec\n", time_total); } long tracks_per_second = 2 * input.ntracks/time_transport; #ifdef MPI MPI_Barrier(grid.cart_comm_3d); long global_tps = 0; MPI_Reduce( &tracks_per_second, // Send Buffer &global_tps, // Receive Buffer 1, // Element Count MPI_LONG, // Element Type MPI_SUM, // Reduciton Operation Type 0, // Master Rank grid.cart_comm_3d ); // MPI Communicator MPI_Barrier(grid.cart_comm_3d); tracks_per_second = global_tps; #endif if( mype == 0 ) { printf("Time per Intersection: "); printf("%.2lf ns\n", time_per_intersection( input, time_transport )); border_print(); } free_2D_tracks( params.tracks_2D ); free_tracks( params.tracks ); #ifdef MPI MPI_Finalize(); #endif return 0; }
void run_kernel( Input * I, Source * S, Table * table) { // Enter Parallel Region #pragma omp parallel default(none) shared(I, S, table) { #ifdef OPENMP int thread = omp_get_thread_num(); #else int thread = 0; #endif // Create Thread Local Random Seed unsigned int seed = time(NULL) * (thread+1); // Allocate Thread Local SIMD Vectors (align if using intel compiler) #ifdef INTEL SIMD_Vectors simd_vecs = aligned_allocate_simd_vectors(I); float * state_flux = (float *) _mm_malloc( I->egroups * sizeof(float), 64); #else SIMD_Vectors simd_vecs = allocate_simd_vectors(I); float * state_flux = (float *) malloc( I->egroups * sizeof(float)); #endif // Allocate Thread Local Flux Vector for( int i = 0; i < I->egroups; i++ ) state_flux[i] = (float) rand_r(&seed) / RAND_MAX; // Initialize PAPI Counters (if enabled) #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events, I); } #endif // Enter OMP For Loop over Segments #pragma omp for schedule(dynamic,100) for( long i = 0; i < I->segments; i++ ) { // Pick Random QSR int QSR_id = rand_r(&seed) % I->source_3D_regions; // Pick Random Fine Axial Interval int FAI_id = rand_r(&seed) % I->fine_axial_intervals; // Attenuate Segment attenuate_segment( I, S, QSR_id, FAI_id, state_flux, &simd_vecs, table); } // Stop PAPI Counters #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events, I); #endif } }
int main( int argc, char* argv[] ) { // ===================================================================== // Initialization & Command Line Read-In // ===================================================================== int version = 13; int mype = 0; int max_procs = omp_get_num_procs(); int i, thread, mat; unsigned long seed; double omp_start, omp_end, p_energy; unsigned long long vhash = 0; int nprocs; #ifdef MPI MPI_Status stat; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mype); #endif // rand() is only used in the serial initialization stages. // A custom RNG is used in parallel portions. #ifdef VERIFICATION srand(26); #else srand(time(NULL)); #endif // Process CLI Fields -- store in "Inputs" structure Inputs in = read_CLI( argc, argv ); // Set number of OpenMP Threads omp_set_num_threads(in.nthreads); // Print-out of Input Summary if( mype == 0 ) print_inputs( in, nprocs, version ); // ===================================================================== // Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data // ===================================================================== // Allocate & fill energy grids #ifndef BINARY_READ if( mype == 0) printf("Generating Nuclide Energy Grids...\n"); #endif NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints); #ifdef VERIFICATION generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #else generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif // Sort grids by energy #ifndef BINARY_READ if( mype == 0) printf("Sorting Nuclide Energy Grids...\n"); sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif // Prepare Unionized Energy Grid Framework #ifndef BINARY_READ GridPoint * energy_grid = generate_energy_grid( in.n_isotopes, in.n_gridpoints, nuclide_grids ); #else GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes * in.n_gridpoints * sizeof( GridPoint ) ); int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints * in.n_isotopes * sizeof(int)); for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ ) energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes]; #endif // Double Indexing. Filling in energy_grid with pointers to the // nuclide_energy_grids. #ifndef BINARY_READ set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif #ifdef BINARY_READ if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n"); binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid); #endif // Get material data if( mype == 0 ) printf("Loading Mats...\n"); int *num_nucs = load_num_nucs(in.n_isotopes); int **mats = load_mats(num_nucs, in.n_isotopes); #ifdef VERIFICATION double **concs = load_concs_v(num_nucs); #else double **concs = load_concs(num_nucs); #endif #ifdef BINARY_DUMP if( mype == 0 ) printf("Dumping data to binary file...\n"); binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid); if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n"); return 0; #endif // ===================================================================== // Cross Section (XS) Parallel Lookup Simulation Begins // ===================================================================== // Outer benchmark loop can loop through all possible # of threads #ifdef BENCHMARK for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ ) { in.nthreads = bench_n; omp_set_num_threads(in.nthreads); #endif if( mype == 0 ) { printf("\n"); border_print(); center_print("SIMULATION", 79); border_print(); } omp_start = omp_get_wtime(); //initialize papi with one thread (master) here #ifdef PAPI if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){ fprintf(stderr, "PAPI library init error!\n"); exit(1); } #endif // OpenMP compiler directives - declaring variables as shared or private #pragma omp parallel default(none) \ private(i, thread, p_energy, mat, seed) \ shared( max_procs, in, energy_grid, nuclide_grids, \ mats, concs, num_nucs, mype, vhash) { // Initialize parallel PAPI counters #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif double macro_xs_vector[5]; double * xs = (double *) calloc(5, sizeof(double)); // Initialize RNG seeds for threads thread = omp_get_thread_num(); seed = (thread+1)*19+17; // XS Lookup Loop #pragma omp for schedule(dynamic) for( i = 0; i < in.lookups; i++ ) { // Status text if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (i / ( (double)in.lookups / (double) in.nthreads )) / (double) in.nthreads * 100.0); // Randomly pick an energy and material for the particle #ifdef VERIFICATION #pragma omp critical { p_energy = rn_v(); mat = pick_mat(&seed); } #else p_energy = rn(&seed); mat = pick_mat(&seed); #endif // debugging //printf("E = %lf mat = %d\n", p_energy, mat); // This returns the macro_xs_vector, but we're not going // to do anything with it in this program, so return value // is written over. calculate_macro_xs( p_energy, mat, in.n_isotopes, in.n_gridpoints, num_nucs, concs, energy_grid, nuclide_grids, mats, macro_xs_vector ); // Copy results from above function call onto heap // so that compiler cannot optimize function out // (only occurs if -flto flag is used) memcpy(xs, macro_xs_vector, 5*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf", p_energy, mat, macro_xs_vector[0], macro_xs_vector[1], macro_xs_vector[2], macro_xs_vector[3], macro_xs_vector[4]); unsigned long long vhash_local = hash(line, 10000); #pragma omp atomic vhash += vhash_local; #endif } // Prints out thread local PAPI counters #ifdef PAPI if( mype == 0 && thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } #ifndef PAPI if( mype == 0) { printf("\n" ); printf("Simulation complete.\n" ); } #endif omp_end = omp_get_wtime(); // Print / Save Results and Exit print_results( in, mype, omp_end-omp_start, nprocs, vhash ); #ifdef BENCHMARK } #endif #ifdef MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char *argv[]) { Parameters *parameters; // user defined parameters Geometry *geometry; // homogenous cube geometry Material *material; // problem material Bank *source_bank; // array for particle source sites Tally *tally; // scalar flux tally double *keff; // effective multiplication factor double t1, t2; // timers #ifdef _OPENMP unsigned long counter = 0; //counter to decide the start pos of master bank copy from sub banks Bank *g_fission_bank; //global fission bank #endif // Get inputs: set parameters to default values, parse parameter file, // override with any command line inputs, and print parameters parameters = init_parameters(); parse_parameters(parameters); read_CLI(argc, argv, parameters); print_parameters(parameters); // Set initial RNG seed set_initial_seed(parameters->seed); set_stream(STREAM_INIT); // Create files for writing results to init_output(parameters); // Set up geometry geometry = init_geometry(parameters); // Set up material material = init_material(parameters); // Set up tallies tally = init_tally(parameters); // Create source bank and initial source distribution source_bank = init_source_bank(parameters, geometry); // Create fission bank #ifdef _OPENMP omp_set_num_threads(parameters->n_threads); // Set number of openmp threads printf("threads num: %d\n", parameters->n_threads); // Allocate one master fission bank g_fission_bank = init_bank(2*parameters->n_particles); #endif // Set up array for k effective keff = calloc(parameters->n_active, sizeof(double)); center_print("SIMULATION", 79); border_print(); printf("%-15s %-15s %-15s\n", "BATCH", "KEFF", "MEAN KEFF"); #ifdef _OPENMP // Start time t1 = omp_get_wtime(); run_eigenvalue(counter, g_fission_bank, parameters, geometry, material, source_bank, fission_bank, tally, keff); // Stop time t2 = omp_get_wtime(); #endif printf("Simulation time: %f secs\n", t2-t1); // Free memory #ifdef _OPENMP free_bank(g_fission_bank); #endif free(keff); free_tally(tally); free_bank(source_bank); free_material(material); free(geometry); free(parameters); return 0; }
// Stops the papi counters and prints results void counter_stop( int * eventset, int num_papi_events, Input * I ) { int * events = malloc(num_papi_events * sizeof(int)); int n = num_papi_events; PAPI_list_events( *eventset, events, &n ); PAPI_event_info_t info; long_long * values = malloc( num_papi_events * sizeof(long_long)); PAPI_stop(*eventset, values); int thread = omp_get_thread_num(); int nthreads = omp_get_num_threads(); static long LLC_cache_miss = 0; static long total_cycles = 0; static long FLOPS = 0; static long stall_any = 0; static long stall_SB = 0; static long stall_RS = 0; static long stall_OO = 0; static long tlb_load = 0; static long tlb_load_m = 0; static long tlb_store = 0; static long tlb_store_m = 0; #pragma omp master { I->vals_accum = malloc( num_papi_events * sizeof(long long)); for(int i=0; i < num_papi_events ; i ++) I->vals_accum[i] = 0; } #pragma omp barrier #pragma omp critical (papi) { printf("Thread %d\n", thread); for( int i = 0; i < num_papi_events; i++ ) { I->vals_accum[i] += values[i]; PAPI_get_event_info(events[i], &info); printf("%-15lld\t%s\t%s\n", values[i],info.symbol,info.long_descr); if( strcmp(info.symbol, "PAPI_L3_TCM") == 0 ) LLC_cache_miss += values[i]; if( strcmp(info.symbol, "PAPI_TOT_CYC") == 0 ) total_cycles += values[i]; if( strcmp(info.symbol, "PAPI_SP_OPS") == 0 ) FLOPS += values[i]; if( strcmp(info.symbol, "RESOURCE_STALLS:ANY") == 0 ) stall_any += values[i]; if( strcmp(info.symbol, "RESOURCE_STALLS:SB") == 0 ) stall_SB += values[i]; if( strcmp(info.symbol, "RESOURCE_STALLS:RS") == 0 ) stall_RS += values[i]; if( strcmp(info.symbol, "RESOURCE_STALLS2:OOO_RSRC") == 0 ) stall_OO += values[i]; if( strcmp(info.symbol, "perf::DTLB-LOADS") == 0 ) tlb_load += values[i]; if( strcmp(info.symbol, "perf::DTLB-LOAD-MISSES") == 0 ) tlb_load_m += values[i]; if( strcmp(info.symbol, "perf::DTLB-STORES") == 0 ) tlb_store += values[i]; if( strcmp(info.symbol, "perf::DTLB-STORE-MISSES") == 0 ) tlb_store_m += values[i]; } free(values); } { #pragma omp barrier } #pragma omp master { if( omp_get_num_threads() > 1){ printf("Thread Totals:\n"); for( int i = 0; i < num_papi_events; i++ ) { PAPI_get_event_info(events[i], &info); printf("%-15lld\t%s\t%s\n", I->vals_accum[i],info.symbol,info.long_descr); } } free( I->vals_accum ); border_print(); center_print("PERFORMANCE SUMMARY", 79); border_print(); long cycles = (long) (total_cycles / (double) nthreads); double bw = LLC_cache_miss*64./cycles*2.8e9/1024./1024./1024.; if( I->papi_event_set == 0 ) printf("GFLOPs: %.3lf\n", FLOPS / (double) cycles * 2.8 ); if( I->papi_event_set == 1 ) printf("Bandwidth: %.3lf (GB/s)\n", bw); if( I->papi_event_set == 2 ) { printf("%-30s %.2lf%%\n", "Store Buffer Full:", stall_SB / (double) stall_any * 100.); printf("%-30s %.2lf%%\n", "Reservation Station Full:", stall_RS / (double) stall_any * 100.); printf("%-30s %.2lf%%\n", "OO Pipeline Full:", stall_OO / (double) stall_any * 100.); } if( I->papi_event_set == 3 ) printf("CPU Stalled Cycles: %.2lf%%\n", stall_any / (double) total_cycles * 100.); if( I->papi_event_set == 7 ) { printf("%-30s %.2lf%%\n", "Data TLB Load Miss Rate: ", tlb_load_m / (double) tlb_load * 100 ); printf("%-30s %.2lf%%\n", "Data TLB Store Miss Rate: ", tlb_store_m / (double) tlb_store * 100 ); } border_print(); } free(events); }
void run_history_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result ) { printf("Beginning history based simulation...\n"); long g_abrarov = 0; long g_alls = 0; unsigned long vhash = 0; #pragma omp parallel default(none) \ shared(input, data) \ reduction(+:g_abrarov, g_alls, vhash) { double * xs = (double *) calloc(4, sizeof(double)); int thread = omp_get_thread_num(); long abrarov = 0; long alls = 0; #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif complex double * sigTfactors = (complex double *) malloc( input.numL * sizeof(complex double) ); // This loop is independent! // I.e., particle histories can be executed in any order #pragma omp for schedule(guided) for( int p = 0; p < input.particles; p++ ) { // Particles are seeded by their particle ID unsigned long seed = ((unsigned long) p+ (unsigned long)1)* (unsigned long) 13371337; // Randomly pick an energy and material for the particle double E = rn(&seed); int mat = pick_mat(&seed); #ifdef STATUS if( thread == 0 && p % 35 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (p / ( (double)input.particles / (double) input.nthreads )) / (double) input.nthreads * 100.0); #endif // This loop is dependent! // I.e., This loop must be executed sequentially, // as each lookup depends on results from the previous lookup. for( int i = 0; i < input.lookups; i++ ) { double macro_xs[4] = {0}; calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); // Results are copied onto heap to avoid some compiler // flags (-flto) from optimizing out function call memcpy(xs, macro_xs, 4*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le", E, mat, macro_xs[0], macro_xs[1], macro_xs[2], macro_xs[3]); unsigned long long vhash_local = hash(line, 10000); vhash += vhash_local; #endif // Randomly pick next energy and material for the particle // Also incorporates results from macro_xs lookup to // enforce loop dependency. // In a real MC app, this dependency is expressed in terms // of branching physics sampling, whereas here we are just // artificially enforcing this dependence based on altering // the seed for( int x = 0; x < 4; x++ ) { if( macro_xs[x] > 0 ) seed += 1337*p; else seed += 42; } E = rn(&seed); mat = pick_mat(&seed); } } free(sigTfactors); // Accumulate global counters g_abrarov = abrarov; g_alls = alls; #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } *abrarov_result = g_abrarov; *alls_result = g_alls; *vhash_result = vhash; }
void run_event_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result ) { printf("Beginning event based simulation...\n"); long g_abrarov = 0; long g_alls = 0; unsigned long vhash = 0; #pragma omp parallel default(none) \ shared(input, data) \ reduction(+:g_abrarov, g_alls, vhash) { double * xs = (double *) calloc(4, sizeof(double)); int thread = omp_get_thread_num(); long abrarov = 0; long alls = 0; #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif complex double * sigTfactors = (complex double *) malloc( input.numL * sizeof(complex double) ); // This loop is independent! // I.e., macroscopic cross section lookups in the event based simulation // can be executed in any order. #pragma omp for schedule(guided) for( int i = 0; i < input.lookups; i++ ) { // Particles are seeded by their particle ID unsigned long seed = ((unsigned long) i+ (unsigned long)1)* (unsigned long) 13371337; // Randomly pick an energy and material for the particle double E = rn(&seed); int mat = pick_mat(&seed); #ifdef STATUS if( thread == 0 && i % 2000 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (i / ( (double)input.lookups / (double) input.nthreads )) / (double) input.nthreads * 100.0); #endif double macro_xs[4] = {0}; calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); // Results are copied onto heap to avoid some compiler // flags (-flto) from optimizing out function call memcpy(xs, macro_xs, 4*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le", E, mat, macro_xs[0], macro_xs[1], macro_xs[2], macro_xs[3]); unsigned long long vhash_local = hash(line, 10000); vhash += vhash_local; #endif } free(sigTfactors); // Accumulate global counters g_abrarov = abrarov; g_alls = alls; #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } *abrarov_result = g_abrarov; *alls_result = g_alls; *vhash_result = vhash; }
int main(int argc, char *argv[]) { Parameters *parameters; // user defined parameters Geometry *geometry; // homogenous cube geometry Material *material; // problem material Bank *source_bank; // array for particle source sites Bank *fission_bank; // array for particle fission sites Tally *tally; // scalar flux tally double *keff; // effective multiplication factor double t1, t2; // timers // Get inputs: set parameters to default values, parse parameter file, // override with any command line inputs, and print parameters parameters = init_parameters(); parse_parameters(parameters); read_CLI(argc, argv, parameters); print_parameters(parameters); // Set initial RNG seed set_initial_seed(parameters->seed); set_stream(STREAM_OTHER); // Create files for writing results to init_output(parameters); // Set up geometry geometry = init_geometry(parameters); // Set up material material = init_material(parameters); // Set up tallies tally = init_tally(parameters); // Create source bank and initial source distribution source_bank = init_source_bank(parameters, geometry); // Create fission bank fission_bank = init_fission_bank(parameters); // Set up array for k effective keff = calloc(parameters->n_active, sizeof(double)); center_print("SIMULATION", 79); border_print(); printf("%-15s %-15s %-15s %-15s\n", "BATCH", "ENTROPY", "KEFF", "MEAN KEFF"); // Start time t1 = timer(); run_eigenvalue(parameters, geometry, material, source_bank, fission_bank, tally, keff); // Stop time t2 = timer(); printf("Simulation time: %f secs\n", t2-t1); // Free memory free(keff); free_tally(tally); free_bank(fission_bank); free_bank(source_bank); free_material(material); free(geometry); free(parameters); return 0; }