static void EMU_CALL gate_transition(struct IOPTIMER_STATE *state) { uint32 c; for(c = 0; c < COUNTERS; c++) { // must be both enabled and gate-enabled if((state->counter[c].mode & 0x41) != 0x41) continue; switch(state->counter[c].mode & 0x6) { case 0x0: // TM_GATE_ON_Count if(state->gate) { counter_start(state, c); } else { counter_stop(state, c); } break; case 0x2: // TM_GATE_ON_ClearStart if(state->gate) { counter_start(state, c); } break; case 0x4: // TM_GATE_ON_Clear_OFF_Start if(state->gate) { counter_stop(state, c); } else { counter_start(state, c); } break; case 0x6: // TM_GATE_ON_Start if(state->gate) { // one-time start: disable gate bit state->counter[c].mode &= ~1; counter_start(state, c); } break; } } }
int main(int argc, char *argv[]){ int i; int j; int k; int *data ; unsigned adj; adj = 32 - ((unsigned)data%32); data = data +adj; data= (int *)malloc(sizeof(int) * (1 <<(WRITE_DATA_SIZE-2)) * 10 +64); int arrary_size = (1 <<(WRITE_DATA_SIZE-2)) * 10; counter_start(); memset(data,0,(1<<(WRITE_DATA_SIZE-2)) * 10); for(j = 1; j<=100; j = j*10){ while(*data < j){ for(k = 0; k<=7;k++){ for (i =k; i<arrary_size;i=i+8){ *(data+i) = *(data+i) +1; } } } printf("%d\n",*data); } counter_stop(); printf("Total time for prog2 is %u s\n",c2-c1) free(data); return 0; }
unsigned readData(int *data, long length){ int i; //iterator counter_start(); for(i=0 ;i<length/8 ;i++){ (*(data + i*8))++; } counter_stop(); return c2-c1; }
int readData(int *data, int size, int slot){ int i,temp; counter_start(); for(i =0;i<size;i++){ (*(data + i * slot))++; } counter_stop(); printf("%d %u\n",slot,c2-c1); return 0; }
unsigned readCache(int *a, long start, long end){ int i; //iterator int tmp; //temp data counter_start(); for(i=start;i<end;i++){ tmp=a[i]; } counter_stop(); return c2-c1; }
int calculateTime(int *data, int log_of_associativity){ unsigned c =0; long dataNumber = 1 << (CACHE_SIZE - log_of_associativity); long dataSize = 1; int associativity = 1 << (log_of_associativity); int i,counter; for(counter = 0; counter < TIMES;counter++){ for(i = 0; i < associativity;i++){ counter_start(); counter_stop(); counter_start(); (*(data + 2 * i * dataNumber))++; counter_stop(); c += c2-c1; } } double times = TIMES * associativity; printf("Cycles for %3d way: %u\n", associativity,c/associativity ); return 0; }
int readData(int *data,int initial, int length){ int i,temp; struct timeval sc,tc; unsigned c=0; for(i = 0;i < length / 4; i++){ counter_start(); temp = *(data + i * 4); counter_stop(); c += c2-c1; } printf("%d %d %u\n", initial, initial + length ,c); return 0; }
int readData(int *data,int initial, int length){ int i,temp; struct timeval sc,tc; // gettimeofday(&sc,NULL); counter_start(); for(i = 0;i < length / 4; i++){ temp = *(data + i * 4); } /*gettimeofday(&tc,NULL); unsigned long time = 1000000 * (tc.tv_sec - sc.tv_sec) + tc.tv_usec - sc.tv_usec;*/ counter_stop(); printf("%d %d %u\n", initial, initial + length ,c2-c1); return 0; }
int com_stat(char *arg) { int nrecs; int tsID; UNUSED(arg); fprintf(stdout, "name=%s\n", all_vars[gl_varID].name); for ( tsID = gl_tsID1; tsID <= gl_tsID2; ++tsID ) { nrecs = streamInqTimestep(gl_streamID, tsID); if ( nrecs == 0 ) { fprintf(stderr, "Timestep %d out of range!\n", tsID+1); break; } else { int i; int nmiss; int gridsize; double fmin = 1.e50 , fmax = -1.e50, fmean = 0; counter_t counter; counter_start(&counter); streamReadVarSlice(gl_streamID, gl_varID, levelID, gl_data, &nmiss); gridsize = gridInqSize(vlistInqVarGrid(gl_vlistID, gl_varID)); for ( i = 0; i < gridsize; ++i ) { if ( gl_data[i] < fmin ) fmin = gl_data[i]; if ( gl_data[i] > fmax ) fmax = gl_data[i]; fmean += gl_data[i]; } fmean /= gridsize; counter_stop(&counter); fprintf(stdout, "timestep=%d %g %g %g (%gs)\n", tsID+1, fmin, fmean, fmax, counter_cputime(counter)); } } return (0); }
void LongProcessing(void) { int i, j; counter_start(10000); for (i = 0; i < 10000; i++) { for (j = 0; j < 10000; j++) { double x = fabs(sin(j * 100.) + cos(j * 100.)); x = sqrt(x * x); } if (!counter_inc()) break; } counter_stop(); }
void run_kernel( Input * I, Source * S, Table * table) { // Enter Parallel Region #pragma omp parallel default(none) shared(I, S, table) { #ifdef OPENMP int thread = omp_get_thread_num(); #else int thread = 0; #endif // Create Thread Local Random Seed unsigned int seed = time(NULL) * (thread+1); // Allocate Thread Local SIMD Vectors (align if using intel compiler) #ifdef INTEL SIMD_Vectors simd_vecs = aligned_allocate_simd_vectors(I); float * state_flux = (float *) _mm_malloc( I->egroups * sizeof(float), 64); #else SIMD_Vectors simd_vecs = allocate_simd_vectors(I); float * state_flux = (float *) malloc( I->egroups * sizeof(float)); #endif // Allocate Thread Local Flux Vector for( int i = 0; i < I->egroups; i++ ) state_flux[i] = (float) rand_r(&seed) / RAND_MAX; // Initialize PAPI Counters (if enabled) #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events, I); } #endif // Enter OMP For Loop over Segments #pragma omp for schedule(dynamic,100) for( long i = 0; i < I->segments; i++ ) { // Pick Random QSR int QSR_id = rand_r(&seed) % I->source_3D_regions; // Pick Random Fine Axial Interval int FAI_id = rand_r(&seed) % I->fine_axial_intervals; // Attenuate Segment attenuate_segment( I, S, QSR_id, FAI_id, state_flux, &simd_vecs, table); } // Stop PAPI Counters #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events, I); #endif } }
/**@brief AMT Service Handler. */ static void amts_evt_handler(nrf_ble_amts_evt_t evt) { ret_code_t err_code; switch (evt.evt_type) { case SERVICE_EVT_NOTIF_ENABLED: { NRF_LOG_INFO("Notifications enabled.\r\n"); bsp_board_led_on(LED_READY); m_notif_enabled = true; if (m_board_role != BOARD_TESTER) { return; } if (m_gap_role == BLE_GAP_ROLE_PERIPH) { m_conn_interval_configured = false; m_conn_param.min_conn_interval = m_test_params.conn_interval; m_conn_param.max_conn_interval = m_test_params.conn_interval + 1; err_code = ble_conn_params_change_conn_params(&m_conn_param); if (err_code != NRF_SUCCESS) { NRF_LOG_ERROR("ble_conn_params_change_conn_params() failed: 0x%x.\r\n", err_code); } } if (m_gap_role == BLE_GAP_ROLE_CENTRAL) { m_conn_interval_configured = true; m_conn_param.min_conn_interval = m_test_params.conn_interval; m_conn_param.max_conn_interval = m_test_params.conn_interval; err_code = sd_ble_gap_conn_param_update(m_conn_handle, &m_conn_param); if (err_code != NRF_SUCCESS) { NRF_LOG_ERROR("sd_ble_gap_conn_param_update() failed: 0x%x.\r\n", err_code); } } } break; case SERVICE_EVT_NOTIF_DISABLED: { NRF_LOG_INFO("Notifications disabled.\r\n"); bsp_board_led_off(LED_READY); } break; case SERVICE_EVT_TRANSFER_1KB: { NRF_LOG_INFO("Sent %u KBytes\r\n", (evt.bytes_transfered_cnt / 1024)); bsp_board_led_invert(LED_PROGRESS); } break; case SERVICE_EVT_TRANSFER_FINISHED: { // Stop counter as soon as possible. counter_stop(); bsp_board_led_off(LED_PROGRESS); bsp_board_led_on(LED_FINISHED); uint32_t time_ms = counter_get(); uint32_t bit_count = (evt.bytes_transfered_cnt * 8); float throughput = (((float)(bit_count * 100) / time_ms) / 1024); NRF_LOG_INFO("Done.\r\n\r\n"); NRF_LOG_INFO("=============================\r\n"); NRF_LOG_INFO("Time: %u.%.2u seconds elapsed.\r\n", (counter_get() / 100), (counter_get() % 100)); NRF_LOG_INFO("Throughput: " NRF_LOG_FLOAT_MARKER " Kbits/s.\r\n", NRF_LOG_FLOAT(throughput)); NRF_LOG_INFO("=============================\r\n"); NRF_LOG_INFO("Sent %u bytes of ATT payload.\r\n", evt.bytes_transfered_cnt); NRF_LOG_INFO("Retrieving amount of bytes received from peer...\r\n"); err_code = nrf_ble_amtc_rcb_read(&m_amtc); if (err_code != NRF_SUCCESS) { NRF_LOG_ERROR("nrf_ble_amtc_rcb_read() failed: 0x%x.\r\n", err_code); test_terminate(); } } break; } }
int main( int argc, char* argv[] ) { // ===================================================================== // Initialization & Command Line Read-In // ===================================================================== int version = 13; int mype = 0; int max_procs = omp_get_num_procs(); int i, thread, mat; unsigned long seed; double omp_start, omp_end, p_energy; unsigned long long vhash = 0; int nprocs; #ifdef MPI MPI_Status stat; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mype); #endif // rand() is only used in the serial initialization stages. // A custom RNG is used in parallel portions. #ifdef VERIFICATION srand(26); #else srand(time(NULL)); #endif // Process CLI Fields -- store in "Inputs" structure Inputs in = read_CLI( argc, argv ); // Set number of OpenMP Threads omp_set_num_threads(in.nthreads); // Print-out of Input Summary if( mype == 0 ) print_inputs( in, nprocs, version ); // ===================================================================== // Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data // ===================================================================== // Allocate & fill energy grids #ifndef BINARY_READ if( mype == 0) printf("Generating Nuclide Energy Grids...\n"); #endif NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints); #ifdef VERIFICATION generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #else generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif // Sort grids by energy #ifndef BINARY_READ if( mype == 0) printf("Sorting Nuclide Energy Grids...\n"); sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif // Prepare Unionized Energy Grid Framework #ifndef BINARY_READ GridPoint * energy_grid = generate_energy_grid( in.n_isotopes, in.n_gridpoints, nuclide_grids ); #else GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes * in.n_gridpoints * sizeof( GridPoint ) ); int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints * in.n_isotopes * sizeof(int)); for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ ) energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes]; #endif // Double Indexing. Filling in energy_grid with pointers to the // nuclide_energy_grids. #ifndef BINARY_READ set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif #ifdef BINARY_READ if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n"); binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid); #endif // Get material data if( mype == 0 ) printf("Loading Mats...\n"); int *num_nucs = load_num_nucs(in.n_isotopes); int **mats = load_mats(num_nucs, in.n_isotopes); #ifdef VERIFICATION double **concs = load_concs_v(num_nucs); #else double **concs = load_concs(num_nucs); #endif #ifdef BINARY_DUMP if( mype == 0 ) printf("Dumping data to binary file...\n"); binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid); if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n"); return 0; #endif // ===================================================================== // Cross Section (XS) Parallel Lookup Simulation Begins // ===================================================================== // Outer benchmark loop can loop through all possible # of threads #ifdef BENCHMARK for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ ) { in.nthreads = bench_n; omp_set_num_threads(in.nthreads); #endif if( mype == 0 ) { printf("\n"); border_print(); center_print("SIMULATION", 79); border_print(); } omp_start = omp_get_wtime(); //initialize papi with one thread (master) here #ifdef PAPI if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){ fprintf(stderr, "PAPI library init error!\n"); exit(1); } #endif // OpenMP compiler directives - declaring variables as shared or private #pragma omp parallel default(none) \ private(i, thread, p_energy, mat, seed) \ shared( max_procs, in, energy_grid, nuclide_grids, \ mats, concs, num_nucs, mype, vhash) { // Initialize parallel PAPI counters #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif double macro_xs_vector[5]; double * xs = (double *) calloc(5, sizeof(double)); // Initialize RNG seeds for threads thread = omp_get_thread_num(); seed = (thread+1)*19+17; // XS Lookup Loop #pragma omp for schedule(dynamic) for( i = 0; i < in.lookups; i++ ) { // Status text if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (i / ( (double)in.lookups / (double) in.nthreads )) / (double) in.nthreads * 100.0); // Randomly pick an energy and material for the particle #ifdef VERIFICATION #pragma omp critical { p_energy = rn_v(); mat = pick_mat(&seed); } #else p_energy = rn(&seed); mat = pick_mat(&seed); #endif // debugging //printf("E = %lf mat = %d\n", p_energy, mat); // This returns the macro_xs_vector, but we're not going // to do anything with it in this program, so return value // is written over. calculate_macro_xs( p_energy, mat, in.n_isotopes, in.n_gridpoints, num_nucs, concs, energy_grid, nuclide_grids, mats, macro_xs_vector ); // Copy results from above function call onto heap // so that compiler cannot optimize function out // (only occurs if -flto flag is used) memcpy(xs, macro_xs_vector, 5*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf", p_energy, mat, macro_xs_vector[0], macro_xs_vector[1], macro_xs_vector[2], macro_xs_vector[3], macro_xs_vector[4]); unsigned long long vhash_local = hash(line, 10000); #pragma omp atomic vhash += vhash_local; #endif } // Prints out thread local PAPI counters #ifdef PAPI if( mype == 0 && thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } #ifndef PAPI if( mype == 0) { printf("\n" ); printf("Simulation complete.\n" ); } #endif omp_end = omp_get_wtime(); // Print / Save Results and Exit print_results( in, mype, omp_end-omp_start, nprocs, vhash ); #ifdef BENCHMARK } #endif #ifdef MPI MPI_Finalize(); #endif return 0; }
void run_history_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result ) { printf("Beginning history based simulation...\n"); long g_abrarov = 0; long g_alls = 0; unsigned long vhash = 0; #pragma omp parallel default(none) \ shared(input, data) \ reduction(+:g_abrarov, g_alls, vhash) { double * xs = (double *) calloc(4, sizeof(double)); int thread = omp_get_thread_num(); long abrarov = 0; long alls = 0; #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif complex double * sigTfactors = (complex double *) malloc( input.numL * sizeof(complex double) ); // This loop is independent! // I.e., particle histories can be executed in any order #pragma omp for schedule(guided) for( int p = 0; p < input.particles; p++ ) { // Particles are seeded by their particle ID unsigned long seed = ((unsigned long) p+ (unsigned long)1)* (unsigned long) 13371337; // Randomly pick an energy and material for the particle double E = rn(&seed); int mat = pick_mat(&seed); #ifdef STATUS if( thread == 0 && p % 35 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (p / ( (double)input.particles / (double) input.nthreads )) / (double) input.nthreads * 100.0); #endif // This loop is dependent! // I.e., This loop must be executed sequentially, // as each lookup depends on results from the previous lookup. for( int i = 0; i < input.lookups; i++ ) { double macro_xs[4] = {0}; calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); // Results are copied onto heap to avoid some compiler // flags (-flto) from optimizing out function call memcpy(xs, macro_xs, 4*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le", E, mat, macro_xs[0], macro_xs[1], macro_xs[2], macro_xs[3]); unsigned long long vhash_local = hash(line, 10000); vhash += vhash_local; #endif // Randomly pick next energy and material for the particle // Also incorporates results from macro_xs lookup to // enforce loop dependency. // In a real MC app, this dependency is expressed in terms // of branching physics sampling, whereas here we are just // artificially enforcing this dependence based on altering // the seed for( int x = 0; x < 4; x++ ) { if( macro_xs[x] > 0 ) seed += 1337*p; else seed += 42; } E = rn(&seed); mat = pick_mat(&seed); } } free(sigTfactors); // Accumulate global counters g_abrarov = abrarov; g_alls = alls; #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } *abrarov_result = g_abrarov; *alls_result = g_alls; *vhash_result = vhash; }
void run_event_based_simulation(Input input, CalcDataPtrs data, long * abrarov_result, long * alls_result, unsigned long * vhash_result ) { printf("Beginning event based simulation...\n"); long g_abrarov = 0; long g_alls = 0; unsigned long vhash = 0; #pragma omp parallel default(none) \ shared(input, data) \ reduction(+:g_abrarov, g_alls, vhash) { double * xs = (double *) calloc(4, sizeof(double)); int thread = omp_get_thread_num(); long abrarov = 0; long alls = 0; #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif complex double * sigTfactors = (complex double *) malloc( input.numL * sizeof(complex double) ); // This loop is independent! // I.e., macroscopic cross section lookups in the event based simulation // can be executed in any order. #pragma omp for schedule(guided) for( int i = 0; i < input.lookups; i++ ) { // Particles are seeded by their particle ID unsigned long seed = ((unsigned long) i+ (unsigned long)1)* (unsigned long) 13371337; // Randomly pick an energy and material for the particle double E = rn(&seed); int mat = pick_mat(&seed); #ifdef STATUS if( thread == 0 && i % 2000 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (i / ( (double)input.lookups / (double) input.nthreads )) / (double) input.nthreads * 100.0); #endif double macro_xs[4] = {0}; calculate_macro_xs( macro_xs, mat, E, input, data, sigTfactors, &abrarov, &alls ); // Results are copied onto heap to avoid some compiler // flags (-flto) from optimizing out function call memcpy(xs, macro_xs, 4*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.2le %d %.2le %.2le %.2le %.2le", E, mat, macro_xs[0], macro_xs[1], macro_xs[2], macro_xs[3]); unsigned long long vhash_local = hash(line, 10000); vhash += vhash_local; #endif } free(sigTfactors); // Accumulate global counters g_abrarov = abrarov; g_alls = alls; #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } *abrarov_result = g_abrarov; *alls_result = g_alls; *vhash_result = vhash; }