void *Thread(void *arg) { int retval, num_tests = 1; int EventSet1=PAPI_NULL; int mask1, papi_event; int num_events1; long long **values; long long elapsed_us, elapsed_cyc; char event_name[PAPI_MAX_STR_LEN]; /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depends on the availability of the event on the platform */ EventSet1 = add_two_nonderived_events(&num_events1, &papi_event, hw_info, &mask1); expected[EventSet1] = *(int *)arg / mythreshold; myid[EventSet1] = PAPI_thread_id(); values = allocate_test_space(num_tests, num_events1); elapsed_us = PAPI_get_real_usec(); elapsed_cyc = PAPI_get_real_cyc(); if ((retval = PAPI_overflow(EventSet1, papi_event, mythreshold, 0, handler)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_overflow", retval); /* start_timer(1); */ if ((retval = PAPI_start(EventSet1)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); do_stuff(); if ((retval = PAPI_stop(EventSet1, values[0])) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); elapsed_us = PAPI_get_real_usec() - elapsed_us; elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; if ((retval = PAPI_overflow(EventSet1, papi_event, 0, 0, NULL)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_overflow", retval); remove_test_events(&EventSet1, mask1); if ((retval = PAPI_event_code_to_name(papi_event, event_name)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_event_code_to_name", retval); if (!TESTS_QUIET) { printf("Thread 0x%x %s : \t%lld\n", (int) pthread_self(), event_name, (values[0])[0]); printf("Thread 0x%x PAPI_TOT_CYC: \t%lld\n", (int) pthread_self(), (values[0])[1]); printf("Thread 0x%x Real usec : \t%lld\n", (int) pthread_self(), elapsed_us); printf("Thread 0x%x Real cycles : \t%lld\n", (int) pthread_self(), elapsed_cyc); } free_test_space(values, num_tests); pthread_exit(NULL); return (NULL); }
int main(int argc, char **argv) { int retval; long long elapsed_us, elapsed_cyc; const PAPI_hw_info_t *hw_info; tests_quiet(argc, argv); /* Set TESTS_QUIET variable */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); hw_info = PAPI_get_hardware_info(); if (hw_info == NULL) test_fail(__FILE__, __LINE__, "PAPI_get_hardware_info", 2); elapsed_us = PAPI_get_real_usec(); elapsed_cyc = PAPI_get_real_cyc(); printf("Testing real time clock. (CLOCK %d MHz, CPU %f MHz)\n",hw_info->clock_mhz,hw_info->mhz); printf("Sleeping for 10 seconds.\n"); sleep(10); elapsed_us = PAPI_get_real_usec() - elapsed_us; elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; printf("%lld us. %lld cyc.\n",elapsed_us,elapsed_cyc); printf("%f Computed MHz.\n",(float)elapsed_cyc/(float)elapsed_us); /* Elapsed microseconds and elapsed cycles are not as unambiguous as they appear. On Pentium III and 4, for example, cycles is a measured value, while useconds is computed from cycles and mhz. MHz is read from /proc/cpuinfo (on linux). Thus, any error in MHz is propagated to useconds. Conversely, on ultrasparc useconds are extracted from a system call (gethrtime()) and cycles are computed from useconds. Also, MHz comes from a scan of system info, Thus any error in gethrtime() propagates to both cycles and useconds, and cycles can be further impacted by errors in reported MHz. Without knowing the error bars on these system values, we can't really specify error ranges for our reported values, but we *DO* know that errors for at least one instance of Pentium 4 (torc17@utk) are on the order of one part per thousand. Newer multicore Intel processors seem to have broken the relationship between the clock rate reported in /proc/cpuinfo and the actual computed clock. To accomodate this artifact, the test no longer fails, but merely reports results out of range. */ if (elapsed_us < 9000000) printf("NOTE: Elapsed real time less than 9 seconds!\n"); if (elapsed_us > 11000000) printf("NOTE: Elapsed real time greater than 11 seconds!\n"); if ((float)elapsed_cyc < 9.0 * hw_info->mhz * 1000000.0) printf("NOTE: Elapsed real cycles less than 9*MHz*1000000.0!\n"); if ((float)elapsed_cyc > 11.0 * hw_info->mhz * 1000000.0) printf("NOTE: Elapsed real cycles greater than 11*MHz*1000000.0!\n"); test_pass(__FILE__, NULL, 0); exit(1); }
main(){ long_long start_cycles, end_cycles, start_usec, end_usec; int EventSet = PAPI_NULL; int tabla[100]; if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) exit(1); /* Gets the starting time in clock cycles */ start_cycles = PAPI_get_real_cyc(); /* Gets the starting time in microseconds */ start_usec = PAPI_get_real_usec(); /*Create an EventSet */ //if (PAPI_create_eventset(&EventSet) != PAPI_OK) // exit(1); tabla[0]=1; tabla[100]=1; /* Gets the ending time in clock cycles */ end_cycles = PAPI_get_real_cyc(); /* Gets the ending time in microseconds */ end_usec = PAPI_get_real_usec(); printf("Wall clock cycles: %lld\n", end_cycles - start_cycles); printf("Wall clock time in microseconds: %lld\n", end_usec - start_usec); }
int main(int argc, char **argv) { int i, retval; long long elapsed_us, elapsed_cyc; tests_quiet(argc, argv); /* Set TESTS_QUIET variable */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); hw_info = PAPI_get_hardware_info(); if (hw_info == NULL) test_fail(__FILE__, __LINE__, "PAPI_get_hardware_info", 2); elapsed_us = PAPI_get_real_usec(); elapsed_cyc = PAPI_get_real_cyc(); #if defined(_AIX) retval = PAPI_thread_init((unsigned long (*)(void)) (pthread_self)); if (retval != PAPI_OK) { if (retval == PAPI_ESBSTR) test_skip(__FILE__, __LINE__, "PAPI_thread_init", retval); else test_fail(__FILE__, __LINE__, "PAPI_thread_init", retval); } #pragma ibm parallel_loop #elif defined(sgi) && defined(mips) retval = PAPI_thread_init((unsigned long (*)(void)) (mp_my_threadnum)); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_thread_init", retval); } #pragma parallel #pragma local(i) #pragma pfor #elif defined(sun) && defined(sparc) retval = PAPI_thread_init((unsigned long (*)(void)) (thr_self)); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_thread_init", retval); } #pragma MP taskloop private(i) #else #error "Architecture not included in this test file yet." #endif for (i = 1; i < 3; i++) Thread(i, 10000000 * i); elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; elapsed_us = PAPI_get_real_usec() - elapsed_us; if (!TESTS_QUIET) { printf("Master real usec : \t%lld\n", elapsed_us); printf("Master real cycles : \t%lld\n", elapsed_cyc); } test_pass(__FILE__, NULL, 0); exit(1); }
void Thread( int t, int n ) { int retval, num_tests = 1; int EventSet1 = PAPI_NULL; int PAPI_event, mask1; int num_events1; long long **values; long long elapsed_us, elapsed_cyc; char event_name[PAPI_MAX_STR_LEN]; /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); values = allocate_test_space( num_tests, num_events1 ); retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); do_flops( n ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; retval = PAPI_stop( EventSet1, values[0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); remove_test_events( &EventSet1, mask1 ); if ( !TESTS_QUIET ) { printf( "Thread %#x %-12s : \t%lld\n", t, event_name, values[0][1] ); printf( "Thread %#x PAPI_TOT_CYC : \t%lld\n", t, values[0][0] ); } free_test_space( values, num_tests ); if ( !TESTS_QUIET ) { printf( "Thread %#x Real usec : \t%lld\n", t, elapsed_us ); printf( "Thread %#x Real cycles : \t%lld\n", t, elapsed_cyc ); } PAPI_unregister_thread( ); }
void Thread( int n ){ int retval, num_tests = 1; int EventSet1 = PAPI_NULL; int PAPI_event, mask1; int num_events1; long long **values; long long elapsed_us, elapsed_cyc, L1_DCM; char event_name[PAPI_MAX_STR_LEN]; printf( "Thread %#x started\n", omp_get_thread_num( ) ); num_events1 = 2; EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); values = allocate_test_space( num_tests, num_events1 ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_start( EventSet1 ); do_flops( n ); retval = PAPI_stop( EventSet1, values[0] ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; remove_test_events( &EventSet1, mask1 ); if ( !TESTS_QUIET ) { printf( "Thread %#x %-12s : \t%lld\n", omp_get_thread_num( ), event_name, values[0][1] ); printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", omp_get_thread_num( ), values[0][0] ); printf( "Thread %#x Real usec : \t%lld\n", omp_get_thread_num( ), elapsed_us ); printf( "Thread %#x Real cycles : \t%lld\n", omp_get_thread_num( ), elapsed_cyc ); } free_test_space( values, num_tests ); PAPI_unregister_thread( ); printf( "Thread %#x finished\n", omp_get_thread_num( ) ); }
void Thread( int n ) { int retval, num_tests = 1, tmp; int EventSet1 = PAPI_NULL; int mask1 = 0x5; int num_events1; long long **values; long long elapsed_us, elapsed_cyc; EventSet1 = add_test_events( &num_events1, &mask1 ); /* num_events1 is greater than num_events2 so don't worry. */ values = allocate_test_space( num_tests, num_events1 ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_start( EventSet1 ); if ( retval >= PAPI_OK ) exit( 1 ); do_flops( n ); retval = PAPI_stop( EventSet1, values[0] ); if ( retval >= PAPI_OK ) exit( 1 ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; remove_test_events( &EventSet1, mask1 ); printf( "Thread %#x PAPI_FP_INS : \t%lld\n", pthread_self( ), ( values[0] )[0] ); printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", pthread_self( ), ( values[0] )[1] ); printf( "Thread %#x Real usec : \t%lld\n", pthread_self( ), elapsed_us ); printf( "Thread %#x Real cycles : \t%lld\n", pthread_self( ), elapsed_cyc ); free_test_space( values, num_tests ); }
int main( ) { int i, rc; long long elapsed_us, elapsed_cyc; elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); start_pes( 2 ); Thread( 1000000 * ( _my_pe( ) + 1 ) ); elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; printf( "Master real usec : \t%lld\n", elapsed_us ); printf( "Master real cycles : \t%lld\n", elapsed_cyc ); exit( 0 ); }
void PAPI_HW_COUNTER_off(int tid, int aid) { int retval; #ifdef MEASURE_TIME thr_vars[tid]._tmp_time[aid].end=PAPI_get_real_cyc(); #endif #ifdef MEASURE_HW_COUNTER retval=PAPI_read(thr_vars[tid].EventSet, thr_vars[tid].values); if (retval != PAPI_OK) { papi_fail(__FILE__, __LINE__, "PAPI_read()", retval); } #endif #ifdef MEASURE_CPI thr_vars[tid]._tmp_inst[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_inst]; thr_vars[tid]._tmp_cyc[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_cyc]; #endif #ifdef MEASURE_MEMACC thr_vars[tid]._tmp_load[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_load]; thr_vars[tid]._tmp_store[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_store]; #endif #ifdef MEASURE_LLCMISS thr_vars[tid]._tmp_llcmiss[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_llcmiss]; #endif #ifdef MEASURE_ICACHEMISS thr_vars[tid]._tmp_icachemiss[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_icachemiss]; #endif #ifdef MEASURE_DCACHEMISS thr_vars[tid]._tmp_l1dcm[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_l1dcm]; thr_vars[tid]._tmp_l1dca[aid].end=thr_vars[tid].values[thr_vars[tid].papi_idx_l1dca]; #endif #ifdef MEASURE_ENERGY retval=PAPI_read(thr_vars[tid].EnergyEventSet, thr_vars[tid].energy_values); if (retval != PAPI_OK) { papi_fail(__FILE__, __LINE__, "PAPI_read()", retval); } int i; for(i=0;i<thr_vars[tid].num_energy_events;i++){ thr_vars[tid]._tmp_energies[i][aid].end=thr_vars[tid].energy_values[i]; } #endif }
static void init_timer() { static int initialized = 0; if (!initialized) { int mpi_initialized=0; #ifdef USING_PAPI PAPI_library_init(PAPI_VER_CURRENT); init_usec = PAPI_get_real_usec(); init_cycles = PAPI_get_real_cyc(); #endif #ifdef USING_CLOCK_GETTIME clock_getres(clockid, &ts_res); clock_gettime(clockid, &ts_init); #endif #ifdef USING_GETTIMEOFDAY gettimeofday(&tv_init, NULL); #endif initialized = 1; } }
int main(int argc, char *argv[]) { int size, rank, world_rank, my_group; int num_lsms; // number of parallel LSMS instances int size_lsms; // number of atoms in a lsms instance int num_steps; // number of energy calculations int initial_steps; // number of steps before sampling starts int stepCount=0; // count the Monte Carlo steps executed double max_time; // maximum walltime for this run in seconds bool restrict_time = false; // was the maximum time specified? bool restrict_steps = false; // or the max. numer of steps? int align; // alignment of lsms_instances double magnetization; double energy_accumulator; // accumulates the enegy to calculate the mean int energies_accumulated; int new_peid,new_root; static int op,flag; double *evec,*r_values; evec=(double *)shmalloc(sizeof(double)*3*size_lsms); r_values=(double *)shmalloc(sizeof(double)*(R_VALUE_OFFSET+3*(size_lsms+1))); energy_accumulator=0.0; energies_accumulated=0; double walltime_0,walltime; double restartWriteFrequency=30.0*60.0; double nextWriteTime=restartWriteFrequency; MPI_Comm local_comm; int *lsms_rank0; MPI_Status status; char prefix[40]; char i_lsms_name[64]; char gWL_in_name[64], gWL_out_name[64]; char mode_name[64]; char energy_calculation_name[64]; char stupid[37]; char step_out_name[64]; char wl_step_out_name[128]; char *wl_stepf=NULL; bool step_out_flag=false; std::ofstream step_out_file; typedef enum {Constant, Random, WangLandau_1d, ExhaustiveIsing, WangLandau_2d} EvecGenerationMode; typedef enum {MagneticMoment, MagneticMomentZ, MagneticMomentX, MagneticMomentY} SecondDimension; EvecGenerationMode evec_generation_mode = Constant; SecondDimension second_dimension = MagneticMoment; double ev0[3]; bool return_moments_flag=true; // true-> return all magnetic moments from lsms run at each step. bool generator_needs_moment=false; typedef enum {OneStepEnergy, MultiStepEnergy, ScfEnergy} EnergyCalculationMode; EnergyCalculationMode energyCalculationMode = OneStepEnergy; int energyIndex=1; // index for the return value to use for the MC step (0: total energy, 1: band energy) ev0[0]=ev0[1]=0.0; ev0[2]=1.0; // size has to be align + size_lsms*num_lsms align=1; num_lsms=1; size_lsms=-1; my_group=-1; num_steps=1; initial_steps=0; sprintf(i_lsms_name,"i_lsms"); gWL_in_name[0]=gWL_out_name[0]=0; mode_name[0]=0; energy_calculation_name[0]=0; // check command line arguments for(int i=0; i<argc; i++) { if(!strcmp("-num_lsms",argv[i])) num_lsms=atoi(argv[++i]); if(!strcmp("-size_lsms",argv[i])) size_lsms=atoi(argv[++i]); if(!strcmp("-align",argv[i])) align=atoi(argv[++i]); if(!strcmp("-num_steps",argv[i])) {num_steps=atoi(argv[++i]); restrict_steps=true;} if(!strcmp("-initial_steps",argv[i])) initial_steps=atoi(argv[++i]); if(!strcmp("-walltime",argv[i])) {max_time=60.0*atof(argv[++i]); restrict_time=true;} if(!strcmp("-i",argv[i])) strncpy(i_lsms_name,argv[++i],64); if(!strcmp("-random_dir",argv[i])) {evec_generation_mode = Random;} if(!strcmp("-step_out",argv[i])) {strncpy(step_out_name,argv[++i],64); step_out_flag=true; return_moments_flag=true;} if(!strcmp("-wl_out", argv[i])) strncpy(gWL_out_name,argv[++i],64); if(!strcmp("-wl_in", argv[i])) strncpy(gWL_in_name,argv[++i],64); if(!strcmp("-mode", argv[i])) strncpy(mode_name,argv[++i],64); if(!strcmp("-energy_calculation",argv[i])) strncpy(energy_calculation_name,argv[++i],64); } if(!(restrict_steps || restrict_time)) restrict_steps=true; if(mode_name[0]!=0) { if(!strcmp("constant",mode_name)) evec_generation_mode = Constant; if(!strcmp("random",mode_name)) evec_generation_mode = Random; if(!strcmp("1d",mode_name)) evec_generation_mode = WangLandau_1d; if(!strcmp("ising",mode_name)) evec_generation_mode = ExhaustiveIsing; if(!strcmp("2d",mode_name)) evec_generation_mode = WangLandau_2d; if(!strcmp("2d-m",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMoment;} if(!strcmp("2d-x",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentX;} if(!strcmp("2d-y",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentY;} if(!strcmp("2d-z",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentZ;} } if(energy_calculation_name[0]!=0) { if(energy_calculation_name[0]=='o') { energyCalculationMode = OneStepEnergy; energyIndex=1; } if(energy_calculation_name[0]=='m') { energyCalculationMode = MultiStepEnergy; energyIndex=1; } if(energy_calculation_name[0]=='s') { energyCalculationMode = ScfEnergy; energyIndex=0; } } #ifdef USE_PAPI #define NUM_PAPI_EVENTS 4 int hw_counters = PAPI_num_counters(); if(hw_counters>NUM_PAPI_EVENTS) hw_counters=NUM_PAPI_EVENTS; int papi_events[NUM_PAPI_EVENTS]; // = {PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_FP_OPS,PAPI_VEC_INS}; char *papi_event_name[] = {"PAPI_TOT_INS","PAPI_FP_OPS", "RETIRED_SSE_OPERATIONS:DOUBLE_ADD_SUB_OPS:DOUBLE_MUL_OPS:DOUBLE_DIV_OPS:OP_TYPE", "RETIRED_SSE_OPERATIONS:SINGLE_ADD_SUB_OPS:SINGLE_MUL_OPS:SINGLE_DIV_OPS:OP_TYPE"}; // "RETIRED_INSTRUCTIONS", // "RETIRED_MMX_AND_FP_INSTRUCTIONS:PACKED_SSE_AND_SSE2", // "RETIRED_SSE_OPERATIONS:DOUBLE_ADD_SUB_OPS:DOUBLE_MUL_OPS:DOUBLE_DIV_OPS:1", // "RETIRED_SSE_OPERATIONS:SINGLE_ADD_SUB_OPS:SINGLE_MUL_OPS:SINGLE_DIV_OPS:1" // get events from names: for(int i=0; i<NUM_PAPI_EVENTS; i++) { if(PAPI_event_name_to_code(papi_event_name[i],&papi_events[i]) != PAPI_OK) { // printline("Error in obtaining PAPI event code for: "+ttos(papi_event_name[i]), // std::cerr,parameters.myrankWorld); // printline("Skipping all following events", // std::cerr,parameters.myrankWorld); if(hw_counters>i) hw_counters=i; } } long long papi_values[NUM_PAPI_EVENTS+4]; // printline("PAPI: "+ttos(hw_counters)+" counters available",std::cout,parameters.myrankWorld); if(hw_counters>NUM_PAPI_EVENTS) hw_counters=NUM_PAPI_EVENTS; long long papi_real_cyc_0 = PAPI_get_real_cyc(); long long papi_real_usec_0 = PAPI_get_real_usec(); long long papi_virt_cyc_0 = PAPI_get_virt_cyc(); long long papi_virt_usec_0 = PAPI_get_virt_usec(); PAPI_start_counters(papi_events,hw_counters); #endif lsms_rank0=(int *)malloc(sizeof(int)*(num_lsms+1)); // initialize MPI: MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); world_rank=rank; MPI_Comm_size(MPI_COMM_WORLD, &size); walltime_0 = get_rtc(); #ifndef SVN_REV #define SVN_REV "unknown" #endif // make sure 'return_moments_flag' is set correctly switch(evec_generation_mode) { case Constant : break; case Random : break; case WangLandau_1d : return_moments_flag = true; generator_needs_moment = true; break; case ExhaustiveIsing : break; case WangLandau_2d : return_moments_flag = true; generator_needs_moment = true; break; default: std::cout<<" ERROR: UNKNOWN EVEC GENERATION MODE\n"; exit(1); } if(rank==0) { std::cout<<"LSMS_3"<<std::endl; std::cout<<" SVN revision "<<SVN_REV<<std::endl<<std::endl; #ifdef USE_PAPI std::cout<<" Using Papi counters"<<std::endl<<std::endl; #endif std::cout<<" Size of LSMS instances = "<<size_lsms<<" atoms\n"; std::cout<<" Number of LSMS instances = "<<num_lsms<<std::endl; std::cout<<" LSMS Energy calculated using "; switch(energyCalculationMode) { case OneStepEnergy: std::cout<<"oneStepEnergy [frozen potential band energy]"<<std::endl; break; case MultiStepEnergy: std::cout<<"multiStepEnergy [frozen potential band energy with converged Fermi energy]"<<std::endl; break; case ScfEnergy: std::cout<<"scfEnergy [self-consistent total energy]"<<std::endl; break; default: std::cout<<"UNKNOWN ENERGY CALCULATION METHOD"<<std::endl; exit(1); } if(restrict_steps) std::cout<<" Number of gWL steps = "<<num_steps<<std::endl; if(restrict_time) std::cout<<" Maximum walltime = "<<max_time<<"s\n"; std::cout<<" Processor alignment (process allocation quantization) = "<<align<<std::endl; switch(evec_generation_mode) { case Constant : std::cout<<" Constant moments direction along " <<ev0[0]<<" "<<ev0[1]<<" "<<ev0[2]<<std::endl; break; case Random : std::cout<<" Random distribution of moments (no Wang-Landau)"<<std::endl; break; case WangLandau_1d : std::cout<<" Wang-Landau for one continuous variable (energy)"<<std::endl; // return_moments_flag = true; // generator_needs_moment = true; break; case ExhaustiveIsing : std::cout<<" Exhaustive Ising sampling"<<std::endl; break; case WangLandau_2d : std::cout<<" Wang-Landau for two continuous variable (energy, "; switch(second_dimension) { case MagneticMoment : std::cout<<"magnitude of magnetization)"; break; case MagneticMomentX : std::cout<<"x component of magnetization)"; break; case MagneticMomentY : std::cout<<"y component of magnetization)"; break; case MagneticMomentZ : std::cout<<"z component of magnetization)"; break; } std::cout<<std::endl; // return_moments_flag = true; // generator_needs_moment = true; break; default: std::cout<<" ERROR: UNKNOWN EVEC GENERATION MODE\n"; exit(1); } if(step_out_flag) std::cout<<" Step output written to: "<<step_out_name<<std::endl; std::cout<<std::endl; if(step_out_flag && (evec_generation_mode==WangLandau_1d)) { // step_out_flag=false; snprintf(wl_step_out_name,127,"wl1d_%s",step_out_name); wl_stepf=wl_step_out_name; } if(step_out_flag) { step_out_file.open(step_out_name); step_out_file<<"#"; for(int i=0; i<argc; i++) step_out_file<<" "<<argv[i]; step_out_file<<std::endl<<size_lsms<<std::endl; } } if(generator_needs_moment) return_moments_flag=true; if(num_lsms==1) { SHMEM_activeset local_comm; local_comm.rank=shmem_my_pe(); local_comm.size=shmem_n_pes(); local_comm.start_pe=0; local_comm.logPE_stride=0; LSMS lsms_calc(local_comm,i_lsms_name,"1_"); if(rank==0) { std::cout<<"executing LSMS(C++) for "<<lsms_calc.numSpins()<<" atoms\n"; std::cout<<" LSMS version = "<<lsms_calc.version()<<std::endl; } if(energyCalculationMode==OneStepEnergy) std::cout<<"one step Energy = "<<lsms_calc.oneStepEnergy()<<std::endl; else if(energyCalculationMode==MultiStepEnergy) std::cout<<"multi-step Energy = "<<lsms_calc.multiStepEnergy()<<std::endl; else if(energyCalculationMode==ScfEnergy) std::cout<<"self-consistent Energy = "<<lsms_calc.scfEnergy()<<std::endl; else { printf("ERROR: Unknown energy calculation mode for lsms_calc in wl-lsms main!\n"); // MPI_Abort(MPI_COMM_WORLD,5); exit(5); } } else { // build the communicators //int color=MPI_UNDEFINED; //Assuming user passes a power of two while using "-align" int s = align; int comm_size=(size-align)/num_lsms; int world_rank; for(int i=0; i<num_lsms; i++) { if((world_rank>=s) && (world_rank<s+comm_size)) { my_group=i; //color=i; new_peid=world_rank-s; new_root=s; } lsms_rank0[i]=s; s+=comm_size; } if(world_rank==0){ //color=num_lsms; new_peid=0; comm_size=1; new_root=0; } //MPI_Comm_split(MPI_COMM_WORLD, color, 0, &local_comm); SHMEM_activeset local_comm; local_comm.rank=new_peid; local_comm.size=comm_size; local_comm.start_pe=new_root; local_comm.logPE_stride=0; std::cout<<"world_rank="<<world_rank<<" -> group="<<my_group<<std::endl; snprintf(prefix,38,"Group %4d: ",my_group); // now we get ready to do some calculations... if(my_group>=0) { double energy; double band_energy; int static i_values[10]; double static r_values[10]; static int op; //MPI_Comm_rank(local_comm, &rank); rank = local_comm.rank; snprintf(prefix,38,"%d_",my_group); // to use the ramdisk on jaguarpf: // snprintf(prefix,38,"/tmp/ompi/%d_",my_group); LSMS lsms_calc(local_comm,i_lsms_name,prefix); snprintf(prefix,38,"Group %4d: ",my_group); if(rank==0 && my_group==0) { std::cout<<prefix<<"executing LSMS(C++) for "<<lsms_calc.numSpins()<<" atoms\n"; std::cout<<prefix<<" LSMS version = "<<lsms_calc.version()<<std::endl; } // wait for commands from master bool finished=false; while(!finished) { if(rank==0) { //MPI_Recv(evec,3*size_lsms,MPI_DOUBLE,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status); //op =status.MPI_TAG; if (lsms_rank0[0]==world_rank) shmem_barrier(0, lsms_rank0[0], 2, pSync1); } //MPI_Bcast(&op,1,MPI_INT,0,local_comm); shmem_broadcast32(&op, &op, 1, local_comm.start_pe, local_comm.start_pe, local_comm.logPE_stride, local_comm.size, pSync2); /* recognized opcodes: 5: calculate energy recognized energy calculation modes: OneStepEnergy : calclulate frozen potential band energy in one step (don't converge Ef) use only if the Fermi energy will not change due to MC steps! The only method available in LSMS_1.9 MultiStepEnergy : calculate frozen potential band energy after converging Fermi energy This should be the new default method. If the Fermi energy doesn't change multiStepEnergy only performs one step and should be equivalent to oneStepEnergy The tolerance for Ef convergence can be set with LSMS::setEfTol(Real). The default tolerance is set in the LSMS::LSMS constructor (currently 1.0e-6). The maximum number of steps is read from the LSMS input file 'nscf' parameter. ScfEnergy : this will calculate the selfconsistent total energy. The maximum number of steps is read from the LSMS input file 'nscf' parameter. NOT IMPLEMENTED YET!!! 10: get number of sites */ if(op==5) { lsms_calc.setEvec(evec); if(energyCalculationMode==OneStepEnergy) energy=lsms_calc.oneStepEnergy(&band_energy); else if(energyCalculationMode==MultiStepEnergy) band_energy=energy=lsms_calc.multiStepEnergy(); else if(energyCalculationMode==ScfEnergy) energy=lsms_calc.scfEnergy(&band_energy); else { printf("ERROR: Unknown energy calculation mode for lsms_calc in wl-lsms main!\n"); //MPI_Abort(MPI_COMM_WORLD,5); exit(5); } r_values[0]=energy; r_values[1]=band_energy; if(return_moments_flag) { lsms_calc.getMag(&r_values[R_VALUE_OFFSET]); } if(rank==0) { if(return_moments_flag) { //MPI_Send(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,0,1005,MPI_COMM_WORLD); shmem_double_put(r_values, r_values, R_VALUE_OFFSET+3*size_lsms, 0); } else { //MPI_Send(r_values,R_VALUE_OFFSET,MPI_DOUBLE,0,1005,MPI_COMM_WORLD); shmem_double_put(r_values, r_values, R_VALUE_OFFSET, 0); } shmem_fence(); shmem_int_swap(&flag, world_rank, 0); } } else if(op==10) { i_values[0]=lsms_calc.numSpins(); //MPI_Send(i_values,10,MPI_INT,0,1010,MPI_COMM_WORLD); shmem_int_put(i_values, i_values, 10, 0); } else { // printf("world rank %d: recieved exit\n",world_rank); finished=true; } } shfree(evec); //shfree(r_values); } else if(world_rank==0) { int running; double **evecs; //double *r_values; //int i_values[10]; int *init_steps; int total_init_steps; bool accepted; char *wl_inf=NULL; char *wl_outf=NULL; if(gWL_in_name) wl_inf=gWL_in_name; if(gWL_out_name) wl_outf=gWL_out_name; EvecGenerator *generator; /* // get number of spins from first LSMS instance // temp r_values: r_values=(double *)malloc(sizeof(double)*10); MPI_Send(r_values,1,MPI_DOUBLE, lsms_rank0[0], 10, MPI_COMM_WORLD); free(r_values); MPI_Recv(i_values,10,MPI_INT,lsms_rank0[0],1010,MPI_COMM_WORLD,&status); if(i_values[0]!=size_lsms) { printf("Size specified for Wang-Landau and in LSMS input file don't match!\n"); size_lsms=i_values[0]; } */ evecs=(double **)shmalloc(sizeof(double *)*num_lsms); init_steps=(int *)shmalloc(sizeof(int)*num_lsms); for(int i=0; i<num_lsms; i++) { evecs[i]=(double *)shmalloc(sizeof(double)*3*size_lsms); init_steps[i]=initial_steps; } total_init_steps=num_lsms*initial_steps; // Initialize the correct evec generator switch(evec_generation_mode) { case Random : generator = new RandomEvecGenerator(size_lsms); break; case Constant: generator = new ConstantEvecGenerator(size_lsms, ev0, num_lsms); break; //case WangLandau_1d : generator = new WL1dEvecGenerator<std::mt19937>(size_lsms, num_lsms, // evecs, wl_inf, wl_outf, wl_stepf); case WangLandau_1d : generator = new WL1dEvecGenerator<boost::mt19937>(size_lsms, num_lsms, evecs, wl_inf, wl_outf, wl_stepf); break; case ExhaustiveIsing : generator = new ExhaustiveIsing1dEvecGenerator(size_lsms, num_lsms, evecs, wl_inf, wl_outf); break; //case WangLandau_2d : generator = new WL2dEvecGenerator<std::mt19937>(size_lsms, num_lsms, // evecs, wl_inf, wl_outf, wl_stepf); case WangLandau_2d : generator = new WL2dEvecGenerator<boost::mt19937>(size_lsms, num_lsms, evecs, wl_inf, wl_outf, wl_stepf); break; default: std::cerr<<"The code should never arrive here: UNKNOWN EVEC GENERATION MODE\n"; exit(1); } for(int i=0; i<num_lsms; i++) { generator->initializeEvec(i,evecs[i]); } std::cout<<"This is the master node\n"; // issue initial commands to all LSMS instances running=0; bool more_work=true; if(total_init_steps>0) { for(int i=0; i<num_lsms; i++) { std::cout<<"starting initial calculation in group "<<i<<std::endl; //MPI_Send(evecs[i], 3*size_lsms, MPI_DOUBLE, lsms_rank0[i], 5, MPI_COMM_WORLD); shmem_double_put(evec, evecs[i], 3*size_lsms, lsms_rank0[i]); shmem_int_p(&op, 5, lsms_rank0[i]); shmem_fence(); num_steps--; running++; stepCount++; if(restrict_steps) std::cout<<" "<<num_steps<<" steps remaining\n"; } shmem_barrier(0, lsms_rank0[0], 2, pSync1); // first deal with the initial steps: while(running>0) { //if(return_moments_flag) // MPI_Recv(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status); //else // MPI_Recv(r_values,R_VALUE_OFFSET,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status); shmem_int_wait(&flag,-1); running--; // std::cout<<"received energy E_tot ="<<r_values[0]<<std::endl; // std::cout<<" band energy E_band="<<r_values[1]<<std::endl; if(total_init_steps>0) { //int r_group=(status.MPI_SOURCE-align)/comm_size; int r_group=(flag-align)/comm_size; std::cout<<"starting additional calculation in group "<<r_group<<std::endl; if(init_steps[r_group]>0) { more_work = !(generator->generateUnsampledEvec(r_group,evecs[r_group],r_values[energyIndex])); init_steps[r_group]--; total_init_steps--; } //MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 5, MPI_COMM_WORLD); shmem_double_put(r_values, evecs[r_group], 3*size_lsms, lsms_rank0[r_group]); //TODO check this shmem_fence(); num_steps--; running++; stepCount++; if(restrict_steps && num_steps<=0) more_work=false; if(restrict_steps) std::cout<<" "<<num_steps<<" steps remaining\n"; walltime = get_rtc() - walltime_0; if(restrict_time && walltime>=max_time) more_work=false; if(restrict_time) std::cout<<" "<<max_time-walltime<<" seconds remaining\n"; } } } more_work=true; running=0; for(int i=0; i<num_lsms; i++) { std::cout<<"starting main calculation in group "<<i<<std::endl; //MPI_Send(evecs[i], 3*size_lsms, MPI_DOUBLE, lsms_rank0[i], 5, MPI_COMM_WORLD); shmem_double_put(evec, evecs[i], 3*size_lsms, lsms_rank0[i]); shmem_int_p(&op, 5, lsms_rank0[i]); shmem_fence(); num_steps--; running++; stepCount++; if(restrict_steps) std::cout<<" "<<num_steps<<" steps remaining\n"; } shmem_barrier(0, lsms_rank0[0], 2, pSync1); generator->startSampling(); // wait for results and issue new commands or wind down while(running>0) { //MPI_Recv(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status); shmem_int_wait(&flag,-1); running--; std::cout<<"received energy E_tot ="<<r_values[0]<<std::endl; std::cout<<" band energy E_band="<<r_values[1]<<std::endl; // printf("from status.MPI_SOURCE=%d\n",status.MPI_SOURCE); energy_accumulator+=r_values[0]; energies_accumulated++; if(more_work) { int r_group=(status.MPI_SOURCE-align)/comm_size; std::cout<<"starting additional calculation in group "<<r_group<<std::endl; if(generator_needs_moment) { double m0,m1,m2; m0=0.0; m1=0.0; m2=0.0; for(int i=0; i<3*size_lsms; i+=3) { m0+=r_values[R_VALUE_OFFSET+i]; m1+=r_values[R_VALUE_OFFSET+i+1]; m2+=r_values[R_VALUE_OFFSET+i+2]; } switch(second_dimension) { case MagneticMoment : magnetization=std::sqrt(m0*m0+m1*m1+m2*m2); break; case MagneticMomentX : magnetization=m0; break; case MagneticMomentY : magnetization=m1; break; case MagneticMomentZ : magnetization=m2; break; } if(generator->generateEvec(r_group,evecs[r_group],r_values[energyIndex],magnetization, &accepted)) more_work=false; } else { if(generator->generateEvec(r_group,evecs[r_group],r_values[energyIndex], &accepted)) more_work=false; } //MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 5, MPI_COMM_WORLD); shmem_double_put(r_values, evecs[r_group], 3*size_lsms, lsms_rank0[r_group]); //TODO check this shmem_fence(); num_steps--; running++; stepCount++; if(restrict_steps && num_steps<=0) more_work=false; if(restrict_steps) std::cout<<" "<<num_steps<<" steps remaining\n"; walltime = get_rtc() - walltime_0; if(restrict_time && walltime>=max_time) more_work=false; if(restrict_time) std::cout<<" "<<max_time-walltime<<" seconds remaining\n"; } else { // send an exit message to this instance of LSMS int r_group=(status.MPI_SOURCE-align)/comm_size; MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 2, MPI_COMM_WORLD); } if(step_out_flag && accepted) { step_out_file<<"# iteration "<<energies_accumulated<<std::endl; step_out_file.precision(15); step_out_file<<energies_accumulated<<std::endl; step_out_file<<r_values[0]<<" "<<r_values[1]<<std::endl; for(int j=0; j<3*size_lsms; j+=3) { step_out_file<<r_values[j+R_VALUE_OFFSET]<<" "<<r_values[j+R_VALUE_OFFSET+1] <<" "<<r_values[j+R_VALUE_OFFSET+2]<<std::endl; } } // write restart file every restartWriteFrequency seconds if(walltime>nextWriteTime) { generator->writeState("WLrestart.jsn"); nextWriteTime+=restartWriteFrequency; } } generator->writeState("WLrestart.jsn"); /* if(evec_generation_mode==WangLandau_1d) (static_cast<WL1dEvecGenerator<std::mt19937> *>(generator))->writeState("WLrestart.state"); if(evec_generation_mode==ExhaustiveIsing) (static_cast<ExhaustiveIsing1dEvecGenerator *>(generator))->writeState("WLrestart.state"); */ for(int i=0; i<num_lsms; i++) free(evecs[i]); shfree(evecs); //shfree(r_values); } } if(world_rank==0) { if(step_out_flag) { step_out_file<<"# end\n-1\n" <<energy_accumulator/double(energies_accumulated)<<std::endl; step_out_file.close(); } std::cout<<"Finished all scheduled calculations. Freeing resources.\n"; std::cout<<"Energy mean = "<<energy_accumulator/double(energies_accumulated)<<"Ry\n"; } if(num_lsms>1) { // make sure averyone arrives here: MPI_Bcast(stupid,37,MPI_CHAR,0,MPI_COMM_WORLD); if(world_rank==0) { MPI_Comm_free(&local_comm); } else if(my_group>=0) { MPI_Comm_free(&local_comm); } } if(world_rank==0) { double walltime = get_rtc() - walltime_0; std::cout<<" WL-LSMS finished in "<<walltime<<" seconds.\n"; std::cout<<" Monte-Carlo steps / walltime = " <<double(stepCount)/walltime<<"/sec\n"; } #ifdef USE_PAPI PAPI_stop_counters(papi_values,hw_counters); papi_values[hw_counters ] = PAPI_get_real_cyc()-papi_real_cyc_0; papi_values[hw_counters+1] = PAPI_get_real_usec()-papi_real_usec_0; papi_values[hw_counters+2] = PAPI_get_virt_cyc()-papi_virt_cyc_0; papi_values[hw_counters+3] = PAPI_get_virt_usec()-papi_virt_usec_0; long long accumulated_counters[NUM_PAPI_EVENTS+4]; /* for(int i=0; i<hw_counters; i++) { printline(ttos(papi_event_name[i])+" = "+ttos(papi_values[i]), std::cout,parameters.myrankWorld); } printline("PAPI real cycles : "+ttos(papi_values[hw_counters]), std::cout,parameters.myrankWorld); printline("PAPI real usecs : "+ttos(papi_values[hw_counters+1]), std::cout,parameters.myrankWorld); printline("PAPI user cycles : "+ttos(papi_values[hw_counters+2]), std::cout,parameters.myrankWorld); printline("PAPI user usecs : "+ttos(papi_values[hw_counters+3]), std::cout,parameters.myrankWorld); */ //MPI_Reduce(papi_values,accumulated_counters,hw_counters+4, // MPI_LONG,MPI_SUM,0,MPI_COMM_WORLD); shmem_long_sum_to_all(accumulated_counters, papi_values, hw_counters+4, comm.pestart, comm.logPE_stride, comm.size, pWrk_i, pSync2); if(world_rank==0) { for(int i=0; i<hw_counters; i++) { std::cout<<"Accumulated: "<<(papi_event_name[i])<<" = "<<(accumulated_counters[i])<<"\n"; } std::cout<<"PAPI accumulated real cycles : "<<(accumulated_counters[hw_counters])<<"\n"; std::cout<<"PAPI accumulated user cycles : "<<(accumulated_counters[hw_counters+2])<<"\n"; double gflops_papi = ((double)accumulated_counters[1])/ (1000.0*(double)papi_values[hw_counters+1]); double gflops_hw_double = ((double)accumulated_counters[2])/ (1000.0*(double)papi_values[hw_counters+1]); double gflops_hw_single = ((double)accumulated_counters[3])/ (1000.0*(double)papi_values[hw_counters+1]); double gips = ((double)accumulated_counters[0])/(1000.0*(double)papi_values[hw_counters+1]); std::cout<<"PAPI_FP_OPS real GFLOP/s : "<<(gflops_papi)<<"\n"; std::cout<<"PAPI hw double real GFLOP/s : "<<(gflops_hw_double)<<"\n"; std::cout<<"PAPI hw single real GFLOP/s : "<<(gflops_hw_single)<<"\n"; std::cout<<"PAPI real GINST/s : "<<(gips)<<"\n"; } #endif //MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { pthread_t id[NUM_THREADS]; int flops[NUM_THREADS]; int i, rc, retval; pthread_attr_t attr; long long elapsed_us, elapsed_cyc; const PAPI_exe_info_t *prginfo = NULL; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ if ( ( retval = PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); if ( ( retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ) ) != PAPI_OK ) { if ( retval == PAPI_ECMP ) test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); else test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); } if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { retval = 1; test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", retval ); } my_start = prginfo->address_info.text_start; my_end = prginfo->address_info.text_end; length = ( unsigned int ) ( my_end - my_start ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); pthread_attr_init( &attr ); #ifdef PTHREAD_CREATE_UNDETACHED pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); #endif #ifdef PTHREAD_SCOPE_SYSTEM retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); if ( retval != 0 ) test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); #endif for ( i = 0; i < NUM_THREADS; i++ ) { flops[i] = FLOPS * ( i + 1 ); rc = pthread_create( &id[i], &attr, Thread, ( void * ) &flops[i] ); if ( rc ) return ( FAILURE ); } for ( i = 0; i < NUM_THREADS; i++ ) pthread_join( id[i], NULL ); pthread_attr_destroy( &attr ); elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; if ( !TESTS_QUIET ) { printf( "Master real usec : \t%lld\n", elapsed_us ); printf( "Master real cycles : \t%lld\n", elapsed_cyc ); } test_pass( __FILE__, NULL, 0 ); pthread_exit( NULL ); exit( 1 ); }
int main( int argc, char **argv ) { int status, retval, num_tests = 2, tmp; int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; int PAPI_event, PAPI_event2, mask1, mask2; int num_events1, num_events2; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; const PAPI_component_info_t *cmpinfo; pid_t pid, pid2; double ratio1,ratio2; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* Initialize the library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail_exit( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* get the component info and check if we support attach */ if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) { test_fail_exit( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); } if ( cmpinfo->attach == 0 ) { test_skip( __FILE__, __LINE__, "Platform does not support attaching", 0 ); } /* fork off first child */ pid = fork( ); if ( pid < 0 ) { test_fail_exit( __FILE__, __LINE__, "fork()", PAPI_ESYS ); } if ( pid == 0 ) { exit( wait_for_attach_and_loop( 1 ) ); } /* fork off second child, does twice as much */ pid2 = fork( ); if ( pid2 < 0 ) { test_fail_exit( __FILE__, __LINE__, "fork()", PAPI_ESYS ); } if ( pid2 == 0 ) { exit( wait_for_attach_and_loop( 2 ) ); } /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); EventSet2 = add_two_events( &num_events2, &PAPI_event2, &mask2 ); if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_ATTACH, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_ATTACH)" ); return 1 ; } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didnt return true to WIFSTOPPED", 0 ); } if ( ptrace( PTRACE_ATTACH, pid2, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_ATTACH)" ); return 1; } if ( waitpid( pid2, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didnt return true to WIFSTOPPED", 0 ); } } retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); } retval = PAPI_attach( EventSet2, ( unsigned long ) pid2 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); } retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); /* num_events1 is greater than num_events2 so don't worry. */ values = allocate_test_space( num_tests, num_events1 ); /* Gather before values */ elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); /* Wait for the SIGSTOP. */ if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFSTOPPED", 0 ); } if ( WSTOPSIG( status ) != SIGSTOP ) { test_fail( __FILE__, __LINE__, "Child process didn't stop on SIGSTOP", 0 ); } if ( ptrace( PTRACE_CONT, pid2, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } if ( waitpid( pid2, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFSTOPPED", 0 ); } if ( WSTOPSIG( status ) != SIGSTOP ) { test_fail( __FILE__, __LINE__, "Child process didn't stop on SIGSTOP", 0 ); } } /* start first child */ retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* start second child */ retval = PAPI_start( EventSet2 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* Wait for the SIGSTOP. */ if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_ATTACH)" ); return 1; } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFSTOPPED", 0 ); } if ( WSTOPSIG( status ) != SIGSTOP ) { test_fail( __FILE__, __LINE__, "Child process didn't stop on SIGSTOP", 0 ); } if ( ptrace( PTRACE_CONT, pid2, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_ATTACH)" ); return 1; } if ( waitpid( pid2, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFSTOPPED", 0 ); } if ( WSTOPSIG( status ) != SIGSTOP ) { test_fail( __FILE__, __LINE__, "Child process didn't stop on SIGSTOP", 0 ); } } elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; /* stop first child */ retval = PAPI_stop( EventSet1, values[0] ); if ( retval != PAPI_OK ) { printf( "Warning: PAPI_stop returned error %d, probably ok.\n", retval ); } /* stop second child */ retval = PAPI_stop( EventSet2, values[1] ); if ( retval != PAPI_OK ) { printf( "Warning: PAPI_stop returned error %d, probably ok.\n", retval ); } remove_test_events( &EventSet1, mask1 ); remove_test_events( &EventSet2, mask2 ); if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } if ( ptrace( PTRACE_CONT, pid2, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFEXITED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFEXITED", 0 ); } if ( waitpid( pid2, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFEXITED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFEXITED", 0 ); } /* This code isn't necessary as we know the child has exited, */ /* it *may* return an error if the component so chooses. You */ /* should use read() instead. */ printf( "Test case: multiple 3rd party attach start, stop.\n" ); printf( "-----------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-------------------------------------------------------------------------\n" ); sprintf( add_event_str, "(PID %jd) %-12s : \t", ( intmax_t ) pid, event_name ); printf( TAB1, add_event_str, values[0][1] ); sprintf( add_event_str, "(PID %jd) PAPI_TOT_CYC : \t", ( intmax_t ) pid ); printf( TAB1, add_event_str, values[0][0] ); sprintf( add_event_str, "(PID %jd) %-12s : \t", ( intmax_t ) pid2, event_name ); printf( TAB1, add_event_str,values[1][1] ); sprintf( add_event_str, "(PID %jd) PAPI_TOT_CYC : \t", ( intmax_t ) pid2 ); printf( TAB1, add_event_str, values[1][0] ); printf( TAB1, "Real usec : \t", elapsed_us ); printf( TAB1, "Real cycles : \t", elapsed_cyc ); printf( TAB1, "Virt usec : \t", elapsed_virt_us ); printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); printf ( "-------------------------------------------------------------------------\n" ); printf("Verification: pid %d results should be twice pid %d\n",pid2,pid ); ratio1=(double)values[1][0]/(double)values[0][0]; ratio2=(double)values[1][1]/(double)values[0][1]; printf("\t%lld/%lld = %lf\n",values[1][0],values[0][0],ratio1); if ((ratio1 >2.15 ) || (ratio1 < 1.85)) { printf("Ratio out of range, should be ~2.0 not %lf\n",ratio1); test_fail( __FILE__, __LINE__, "Error: Counter ratio not two", 0 ); } printf("\t%lld/%lld = %lf\n",values[1][1],values[0][1],ratio2); if ((ratio2 >2.75 ) || (ratio2 < 1.25)) { printf("Ratio out of range, should be ~2.0, not %lf\n",ratio2); test_fail( __FILE__, __LINE__, "Known issue: Counter ratio not two", 0 ); } test_pass( __FILE__, values, num_tests ); return 0; }
int main( int argc, char **argv ) { int status, retval, num_tests = 1, tmp; int EventSet1 = PAPI_NULL; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; char event_name[PAPI_MAX_STR_LEN];; const PAPI_hw_info_t *hw_info; const PAPI_component_info_t *cmpinfo; pid_t pid; /* Fork before doing anything with the PMU */ setbuf(stdout,NULL); pid = fork( ); if ( pid < 0 ) test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); if ( pid == 0 ) exit( wait_for_attach_and_loop( ) ); tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ /* Master only process below here */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail_exit( __FILE__, __LINE__, "PAPI_library_init", retval ); if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) test_fail_exit( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); if ( cmpinfo->attach == 0 ) test_skip( __FILE__, __LINE__, "Platform does not support attaching", 0 ); hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail_exit( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ retval = PAPI_create_eventset(&EventSet1); if ( retval != PAPI_OK ) test_fail_exit( __FILE__, __LINE__, "PAPI_attach", retval ); /* Force addition of component */ retval = PAPI_assign_eventset_component( EventSet1, 0 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", retval ); /* The following call causes this test to fail for perf_events */ retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); if ( retval != PAPI_OK ) test_fail_exit( __FILE__, __LINE__, "PAPI_attach", retval ); sprintf(event_name,"PAPI_TOT_CYC"); retval = PAPI_add_event(EventSet1, PAPI_TOT_CYC); if ( retval != PAPI_OK ) test_fail_exit( __FILE__, __LINE__, "PAPI_add_event", retval ); retval = PAPI_add_event(EventSet1, PAPI_FP_INS); if ( retval == PAPI_ENOEVNT ) { test_warn( __FILE__, __LINE__, "PAPI_FP_INS", retval); } else if ( retval != PAPI_OK ) { test_fail_exit( __FILE__, __LINE__, "PAPI_add_event", retval ); } values = allocate_test_space( 1, 2); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); printf("must_ptrace is %d\n",cmpinfo->attach_must_ptrace); pid_t child = wait( &status ); printf( "Debugger exited wait() with %d\n",child ); if (WIFSTOPPED( status )) { printf( "Child has stopped due to signal %d (%s)\n", WSTOPSIG( status ), strsignal(WSTOPSIG( status )) ); } if (WIFSIGNALED( status )) { printf( "Child %ld received signal %d (%s)\n", (long)child, WTERMSIG(status) , strsignal(WTERMSIG( status )) ); } printf("After %d\n",retval); retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) test_fail_exit( __FILE__, __LINE__, "PAPI_start", retval ); printf("Continuing\n"); if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } do { child = wait( &status ); printf( "Debugger exited wait() with %d\n", child); if (WIFSTOPPED( status )) { printf( "Child has stopped due to signal %d (%s)\n", WSTOPSIG( status ), strsignal(WSTOPSIG( status )) ); } if (WIFSIGNALED( status )) { printf( "Child %ld received signal %d (%s)\n", (long)child, WTERMSIG(status) , strsignal(WTERMSIG( status )) ); } } while (!WIFEXITED( status )); printf("Child exited with value %d\n",WEXITSTATUS(status)); if (WEXITSTATUS(status) != 0) test_fail_exit( __FILE__, __LINE__, "Exit status of child to attach to", PAPI_EMISC); retval = PAPI_stop( EventSet1, values[0] ); if ( retval != PAPI_OK ) test_fail_exit( __FILE__, __LINE__, "PAPI_stop", retval ); elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; retval = PAPI_cleanup_eventset(EventSet1); if (retval != PAPI_OK) test_fail_exit( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); retval = PAPI_destroy_eventset(&EventSet1); if (retval != PAPI_OK) test_fail_exit( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); printf( "Test case: 3rd party attach start, stop.\n" ); printf( "-----------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf ( "-------------------------------------------------------------------------\n" ); printf( "Test type : \t 1\n" ); printf( TAB1, "PAPI_TOT_CYC : \t", ( values[0] )[0] ); printf( TAB1, "PAPI_FP_INS : \t", ( values[0] )[1] ); printf( TAB1, "Real usec : \t", elapsed_us ); printf( TAB1, "Real cycles : \t", elapsed_cyc ); printf( TAB1, "Virt usec : \t", elapsed_virt_us ); printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); printf ( "-------------------------------------------------------------------------\n" ); printf( "Verification: none\n" ); test_pass( __FILE__, values, num_tests ); exit( 1 ); }
void PAPI_HW_COUNTER_open(int tid){ // set events to measure int *Events; int EventCode; int event_ctr = 0; int retval; #ifdef MEASURE_TIME #endif #ifdef MEASURE_CPI thr_vars[tid].papi_idx_inst = thr_vars[tid].num_events++; thr_vars[tid].papi_idx_cyc = thr_vars[tid].num_events++; #endif #ifdef MEASURE_MEMACC thr_vars[tid].papi_idx_load = thr_vars[tid].num_events++; thr_vars[tid].papi_idx_store = thr_vars[tid].num_events++; #endif #ifdef MEASURE_LLCMISS thr_vars[tid].papi_idx_llcmiss = thr_vars[tid].num_events++; #endif #ifdef MEASURE_ICACHEMISS thr_vars[tid].papi_idx_icachemiss = thr_vars[tid].num_events++; #endif #ifdef MEASURE_DCACHEMISS thr_vars[tid].papi_idx_l1dcm = thr_vars[tid].num_events++; thr_vars[tid].papi_idx_l1dca = thr_vars[tid].num_events++; #endif #ifdef MEASURE_ENERGY #endif event_ctr = 0; // reset event counter if((Events=(int*)malloc(sizeof(int)*thr_vars[tid].num_events)) == NULL){ printf("ERROR: Failed to allocate memory for Events."); } if((thr_vars[tid].values=(long long int*)malloc(sizeof(long long)*thr_vars[tid].num_events)) == NULL){ printf("ERROR: Failed to allocate memory for Events."); } #ifdef __ARM_ARCH_7A__ // pin processor only on arm arch. pid_t pid = getpid(); int core = 0; printf("Pinning thread %d to cores %d..%d\n", pid, 0, 0); printf("Observe in terminal via \"ps -p <PID> -L -o pid,tid,psr\"\n"); pin_cpu(pid, core); printf("Pinned to core %d\n", core); #endif // Open file to output char filename_id[2*sizeof(int)]; snprintf(filename_id, sizeof(filename_id),"%d",tid); char* filename_w_id; filename_w_id=(char*)malloc(strlen(OUTFILEID)+strlen(OUTFILEEXT)+strlen(filename_id)+1); strcpy(filename_w_id, OUTFILEID); strcat(filename_w_id, filename_id); strcat(filename_w_id, OUTFILEEXT); thr_vars[tid].f=fopen(filename_w_id, "w"); if (thr_vars[tid].f == NULL){ printf("failed to open file %s.\n", filename_w_id); exit(1); } // Measure clock frequency long long elapsed_cyc; elapsed_cyc = PAPI_get_real_cyc(); sleep(1); elapsed_cyc = PAPI_get_real_cyc()-elapsed_cyc; thr_vars[tid].PAPI_CLOCK_RATE = elapsed_cyc; printf("Measured clock frequency: %.0lld Hz\n",thr_vars[tid].PAPI_CLOCK_RATE); // Set EventSet thr_vars[tid].EventSet = PAPI_NULL;/*EventSet*/ retval=PAPI_create_eventset(&(thr_vars[tid].EventSet)); if (retval != PAPI_OK){ papi_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); } #ifdef MEASURE_TIME #endif #ifdef MEASURE_CPI retval = PAPI_event_name_to_code( PAPI_INST , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, inst", retval); } Events[event_ctr++] = EventCode; retval = PAPI_event_name_to_code( PAPI_CYC , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, cyc", retval); } Events[event_ctr++] = EventCode; #endif #ifdef MEASURE_MEMACC retval = PAPI_event_name_to_code( PAPI_MEM_LOAD , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, loads", retval); } Events[event_ctr++] = EventCode; retval=PAPI_event_name_to_code( PAPI_MEM_STORE , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, stores", retval); } Events[event_ctr++] = EventCode; #endif #ifdef MEASURE_LLCMISS retval = PAPI_event_name_to_code( PAPI_LLC_MISS , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, llc miss", retval); } Events[event_ctr++] = EventCode; #endif #ifdef MEASURE_ICACHEMISS retval = PAPI_event_name_to_code( PAPI_IC_MISS , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, llc miss", retval); } Events[event_ctr++] = EventCode; #endif #ifdef MEASURE_DCACHEMISS retval = PAPI_event_name_to_code( PAPI_L1_DC_MISS , &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, Level 1 data cache misses", retval); } Events[event_ctr++] = EventCode; retval = PAPI_event_name_to_code( PAPI_L1_DC_ACCESS, &EventCode ); if (retval != PAPI_OK ) { papi_fail(__FILE__, __LINE__, "PAPI_event_name_to_code, Level 1 data cache accesses", retval); } Events[event_ctr++] = EventCode; #endif #ifdef MEASURE_ENERGY printf("Probing all RAPL events\n"); thr_vars[tid].numcmp = PAPI_num_components(); for(thr_vars[tid].cid=0; thr_vars[tid].cid<thr_vars[tid].numcmp; thr_vars[tid].cid++) { if ( (thr_vars[tid].cmpinfo = PAPI_get_component_info(thr_vars[tid].cid)) == NULL) { papi_fail(__FILE__, __LINE__,"PAPI_get_component_info failed\n", 0); } if (strstr(thr_vars[tid].cmpinfo->name,"rapl")) { thr_vars[tid].rapl_cid=thr_vars[tid].cid; printf("Found rapl component at cid %d.\n",thr_vars[tid].rapl_cid); if (thr_vars[tid].cmpinfo->disabled) { printf("RAPL component disabled: %s\n", thr_vars[tid].cmpinfo->disabled_reason); exit(EXIT_FAILURE); } break; } } if (thr_vars[tid].cid==thr_vars[tid].numcmp) { // Component not found: papi_fail(__FILE__,__LINE__,"No rapl component found\n",0); } retval = PAPI_create_eventset( &(thr_vars[tid].EnergyEventSet) ); if (retval != PAPI_OK){ papi_fail(__FILE__,__LINE__, "PAPI_create_eventset()", retval); } // Add all events: int r; thr_vars[tid].code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &(thr_vars[tid].code), PAPI_ENUM_FIRST, thr_vars[tid].rapl_cid ); while ( r == PAPI_OK ) { retval = PAPI_event_code_to_name( thr_vars[tid].code, thr_vars[tid].event_names[thr_vars[tid].num_energy_events] ); if ( retval != PAPI_OK ) { printf("Error translating %#x\n",thr_vars[tid].code); papi_fail(__FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } printf("Found event: %s\n", thr_vars[tid].event_names[thr_vars[tid].num_energy_events]); retval = PAPI_get_event_info(thr_vars[tid].code,&(thr_vars[tid].evinfo)); if (retval != PAPI_OK) { papi_fail(__FILE__, __LINE__, "Error getting event info\n",retval); } strncpy(thr_vars[tid].units[thr_vars[tid].num_energy_events],thr_vars[tid].evinfo.units,PAPI_MIN_STR_LEN); thr_vars[tid].data_type[thr_vars[tid].num_energy_events] = thr_vars[tid].evinfo.data_type; retval = PAPI_add_event(thr_vars[tid].EnergyEventSet, thr_vars[tid].code); if (retval != PAPI_OK ) { papi_fail( __FILE__, __LINE__, "PAPI_add_event()", retval); } r = PAPI_enum_cmp_event( &(thr_vars[tid].code), PAPI_ENUM_EVENTS, thr_vars[tid].rapl_cid ); thr_vars[tid].num_energy_events++; } if((thr_vars[tid].energy_values=(long long int*)malloc(sizeof(long long)*thr_vars[tid].num_energy_events)) == NULL){ printf("ERROR: Failed to allocate memory for Events."); } #endif #ifdef MEASURE_HW_COUNTER int k; for(k = 0; k < thr_vars[tid].num_events; k++){ retval = PAPI_add_event(thr_vars[tid].EventSet, Events[k]); if (retval != PAPI_OK ) { printf("At event %d:\n",k); papi_fail( __FILE__, __LINE__, "PAPI_add_event()", retval); } } retval=PAPI_start(thr_vars[tid].EventSet); if (retval != PAPI_OK){ papi_fail(__FILE__, __LINE__, "PAPI_start()", retval); } #endif #ifdef MEASURE_ENERGY retval=PAPI_start(thr_vars[tid].EnergyEventSet); if (retval != PAPI_OK){ papi_fail(__FILE__, __LINE__, "PAPI_start() on energy", retval); } #endif }
int main(int argc, char *argv[]) { float rtime1, rtime2, ptime1, ptime2, mflops; long long flpops; unsigned long int tid; int num_hwcntrs = 0; int fip = 0, retval; float real_time, proc_time; long long flpins; int i; unsigned int EventSet = PAPI_NULL; int count = 0, err_count = 0; PAPI_event_info_t info; long long ( values2[2] )[2]; long long min, max; int PAPI_event, mythreshold = THRESHOLD; char event_name1[PAPI_MAX_STR_LEN]; const PAPI_hw_info_t *hw_info = NULL; int num_events, mask; int num_flops = NUM_FLOPS; long long elapsed_us, elapsed_cyc; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); /* Get hardware info */ hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); EventSet = add_two_nonderived_events( &num_events, &PAPI_event, &mask ); printf("Using %#x for the overflow event\n",PAPI_event); if ( PAPI_event == PAPI_FP_INS ) { mythreshold = THRESHOLD; } else { #if defined(linux) mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; #else mythreshold = THRESHOLD * 2; #endif } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); /* stop the calibration run */ retval = PAPI_stop( EventSet, values2[0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); /* set up overflow handler */ retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); } /* Start overflow run */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( num_flops ); /* stop overflow run */ retval = PAPI_stop( EventSet, values2[1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); if ( !TESTS_QUIET ) { if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name1 ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); printf( "Test case: Overflow dispatch of 2nd event in set with 2 events.\n" ); printf( "---------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", mythreshold ); printf( "Using %d iterations\n", num_flops ); printf( "-----------------------------------------------\n" ); printf( "Test type : %16d%16d\n", 1, 2 ); printf( OUT_FMT, event_name1, ( values2[0] )[1], ( values2[1] )[1] ); printf( OUT_FMT, "PAPI_TOT_CYC", ( values2[0] )[0], ( values2[1] )[0] ); printf( "Overflows : %16s%16d\n", "", total ); printf( "-----------------------------------------------\n" ); } retval = PAPI_cleanup_eventset( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); retval = PAPI_destroy_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); if ( !TESTS_QUIET ) { printf( "Verification:\n" ); #if defined(linux) || defined(__ia64__) || defined(_POWER4) num_flops *= 2; #endif if ( PAPI_event == PAPI_FP_INS || PAPI_event == PAPI_FP_OPS ) { printf( "Row 1 approximately equals %d %d\n", num_flops, num_flops ); } printf( "Column 1 approximately equals column 2\n" ); printf( "Row 3 approximately equals %u +- %u %%\n",( unsigned ) ( ( values2[0] )[1] / ( long long ) mythreshold ),( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); } min = ( long long ) ( ( ( double ) values2[0][1] * ( 1.0 - OVR_TOLERANCE ) ) / ( double ) mythreshold ); max = ( long long ) ( ( ( double ) values2[0][1] * ( 1.0 + OVR_TOLERANCE ) ) / ( double ) mythreshold ); printf( "Overflows: total(%d) > max(%lld) || total(%d) < min(%lld) \n", total, max, total, min ); if ( total > max || total < min ) test_fail( __FILE__, __LINE__, "Overflows", 1 ); printf("Initial thread id is: %lu\n",tid); /* Initialize the PAPI library and get the number of counters available */ if ((num_hwcntrs = PAPI_num_counters()) <= 0) handle_error(1); /* The installation supports PAPI, but has no counters */ if ((num_hwcntrs = PAPI_num_counters()) == 0 ) fprintf(stderr,"Info:: This machine does not provide hardware counters."); printf("This system has %d available counters.\n", num_hwcntrs); if (num_hwcntrs > 2) num_hwcntrs = 2; /* Start counting events */ if (PAPI_start_counters(Events, num_hwcntrs) != PAPI_OK) handle_error(1); if (argc != 8) { printf("\nError :: Ejecutar como : a.out archivo_BD Num_elem archivo_queries Num_queries N_THREADS numero_K Dimension_objetos\n"); return 0; } TOPK = atoi(argv[6]); DIM = atoi(argv[7]); double **DB; double **Consultas; //Cola de consultas int N_QUERIES, N_DB; char str_f[256]; double dato[DIM]; int j; FILE *f_dist, *fquery; Elem *heap, e_temp,*answer; int *acum, N_THREADS; //N_THREADS es el nro. de threads con el que se lanzará la región paralela N_THREADS = atoi(argv[5]); //N_QUERIES es el nro. de consultas N_QUERIES = atoi(argv[4]); N_DB = atoi(argv[2]); printf("\nN_QUERIES = %d\nN_THREADS = %d\n", N_QUERIES, N_THREADS); fflush(stdout); acum = (int *) malloc(sizeof (int)*N_THREADS); for (i = 0; i < N_THREADS; i++) acum[i] = 0; sprintf(str_f, "%s", argv[1]); printf("\nAbriendo %s... ", argv[1]); fflush(stdout); f_dist = fopen(str_f, "r"); printf("OK\n"); fflush(stdout); Consultas = (double **) malloc(sizeof (double *)*N_QUERIES); for (i = 0; i < N_QUERIES; i++) Consultas[i] = (double *) malloc(sizeof (double)*DIM); DB = (double **) malloc(sizeof (double *)*N_DB); for (i = 0; i < N_DB; i++) DB[i] = (double *) malloc(sizeof (double)*DIM); answer = (Elem *)malloc(sizeof(Elem)*N_QUERIES*TOPK); printf("\nCargando DB... "); fflush(stdout); for (i = 0; i < N_DB; i++) { //Usar leedato_cophir() cuando se utilice la BD Cophir para no tener problemas con las "," //if (leedato_cophir(dato, f_dist) == ERROR || feof(f_dist)) if (leedato(dato, f_dist) == ERROR || feof(f_dist)) { printf("\n\nERROR :: N_DB mal establecido\n\n"); fflush(stdout); fclose(f_dist); break; } copiavalor(DB[i], dato); } fclose(f_dist); printf("OK\n"); fflush(stdout); if ((fquery = fopen(argv[3], "r")) == NULL) printf("Error al abrir para lectura el archivo de qeuries: %s\n", argv[3]); else printf("Abriendo para lectura %s\n", argv[3]); printf("\nCargando Consultas... "); fflush(stdout); for (i = 0; i < N_QUERIES; i++) { //Usar leedato_cophir() cuando se utilice la BD Cophir para no tener problemas con las "," //if (leedato_cophir(dato, fquery) == ERROR || feof(fquery)) if (leedato(dato, fquery) == ERROR || feof(fquery)) { printf("\n\nERROR :: N_QUERIES mal establecido, Menos queries que las indicadas\n\n"); fflush(stdout); fclose(fquery); break; } copiavalor(Consultas[i], dato); } fclose(fquery); printf("OK\n"); fflush(stdout); PAPI_start_counters((int*) Events, NUM_EVENTS); omp_set_num_threads(N_THREADS); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( omp_get_thread_num ) ); if ( retval != PAPI_OK ) { if ( retval == PAPI_ECMP ) test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); else test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); } #pragma omp parallel shared(Consultas, DB, N_QUERIES, N_DB, N_THREADS, acum, DIM) { float real_time; struct timeval t1, t2; int i, j; Elem *heap, e_temp; double d; int n_elem = 0; int trid = omp_get_thread_num(); //ID del thread int procs = omp_get_num_threads(); //Nro. total de threads double suma = 0; suma = 0; heap = (Elem *) malloc(sizeof (Elem) * TOPK); #pragma omp barrier #pragma omp master { gettimeofday(&t1, 0); } //Cada hilo accede a un subconjunto de las consultas. Cada hio accede de manera circular al arreglo de consultas. for (i = trid; i < N_QUERIES; i += procs) { n_elem = 0; for (j = 0; j < N_DB; j++) { d = distancia(Consultas[i], DB[j]); //Si la distancia del objeto a la consulta es menor que la raíz del heap, entonces se inserta en el heap. La raíz siempre mantiene la mayor de las distancias if(n_elem<TOPK){ e_temp.dist = d; e_temp.ind = j; inserta2(heap, &e_temp, &n_elem); } if (n_elem==TOPK){ if (d < topH(heap, &n_elem)) { e_temp.dist = d; e_temp.ind = j; //Si el heap no está lleno, se inserta el elemento if (n_elem < TOPK) inserta2(heap, &e_temp, &n_elem); //Si el heap está lleno, se inserta el elemento nuevo y se saca el que era antes de mayor de distancia. popush2() hace las operaciones de sacar el elemento mayor e insertar el nuevo. else popush2(heap, &n_elem, &e_temp); }} } //En este punto del código se tienen los K elemntos más cercanos a la consulta en 'heap'. Se pueden extraer con extraer2() for (j = 0; j < TOPK ; j++) { extrae2(heap, &n_elem, &e_temp); answer[i*TOPK+j].ind = e_temp.ind; answer[i*TOPK+j].dist = e_temp.dist; } //Realizamos una operación con los resultados para que el compilador no evite hacer instrucciones que considere que el usuario no utiliza. Simplemente cada hilo suma las distancias de los elementos mas cercanos a la consulta } Thread( 1000000 * ( tid + 1 ) ); fflush(stdout); #pragma omp barrier #pragma omp master { if ( fip > 0 ) { /* Setup PAPI library and begin collecting data from the counters */ if ( fip == 1 ) { if ( ( retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ) ) < PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); } else { if ( ( retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ) ) < PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); } gettimeofday(&t2, 0); real_time = (t2.tv_sec - t1.tv_sec) + (float) (t2.tv_usec - t1.tv_usec) / 1000000; Salida_Multihilo = fopen("Salida_Multihilo.txt", "w"); for (i = 0; i < N_QUERIES; ++i){ fprintf(Salida_Multihilo, "Consulta id:: %d\n",i); for (j = 0; j < TOPK; ++j){ fprintf(Salida_Multihilo,"ind = %d :: dist = %f\n",answer[(i*TOPK)+j].ind,answer[(i*TOPK)+j].dist); } fprintf(Salida_Multihilo, "---------------------------------\n"); } fclose(Salida_Multihilo); printf("\n\nK = %d", TOPK); printf("\nReal Time = %f segundos.\n", real_time); fflush(stdout); if ( fip == 1 ) { if ( ( retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ) ) < PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); } else { if ( ( retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ) ) < PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); } if ( !TESTS_QUIET ) { if ( fip == 1 ) { printf( "Real_time: %f Proc_time: %f Total flpins: ", real_time, proc_time ); } else { printf( "Real_time: %f Proc_time: %f Total flpops: ", real_time, proc_time ); } printf( LLDFMT, flpins ); printf( " MFLOPS: %f\n", mflops ); } } } free(heap); }//end pragma omp parallel elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; if ( !TESTS_QUIET ) { printf( "Master real usec : \t%lld\n", elapsed_us ); printf( "Master real cycles : \t%lld\n", elapsed_cyc ); } const PAPI_hw_info_t *hwinfo = NULL; const PAPI_mh_tlb_info_t *mhinfo = NULL; const PAPI_mh_cache_info_t *mhcacheinfo = NULL; const PAPI_mh_level_t *mhlevel = NULL; if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) exit(1); if ((hwinfo = PAPI_get_hardware_info()) == NULL) exit(1); if ((mhinfo = PAPI_get_hardware_info()) == NULL) exit(1); if ((mhcacheinfo = PAPI_get_hardware_info()) == NULL) exit(1); if ((mhlevel = PAPI_get_hardware_info()) == NULL) exit(1); printf("\n\nA continuación información actual del equipo\n\n"); printf("MH Type %d - Num entries %d - Associativity %d \n",mhinfo->type, mhinfo->num_entries, mhinfo->associativity); printf("Cache MH type %d size %d line size %d num_lines %d Associativity %d\n\n",mhcacheinfo->type, mhcacheinfo->size,mhcacheinfo->line_size, mhcacheinfo->num_lines, mhcacheinfo->associativity); retval=papi_print_header("Available PAPI preset and user defined events plus hardware information.\n",&hwinfo ); printf("Total hardware flops = %lld\n",(float)values[1]); printf("L2 data cache misses is %lld\n", values[0]); retval = PAPI_stop_counters(values, NUM_EVENTS); return 0; }
void clock_res_check( int flag ) { if ( CLOCK_ERROR ) return; long long *elapsed_cyc, total_cyc = 0, uniq_cyc = 0, diff_cyc = 0; int i; double min, max, average, std, tmp; elapsed_cyc = ( long long * ) calloc( NUM_ITERS, sizeof ( long long ) ); /* Real */ switch ( flag ) { case 0: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_real_cyc( ); break; case 1: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_real_usec( ); break; case 2: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_virt_cyc( ); break; case 3: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_virt_usec( ); break; default: test_fail( __FILE__, __LINE__, "clock_res_check", -1 ); } min = max = ( double ) ( elapsed_cyc[1] - elapsed_cyc[0] ); for ( i = 1; i < NUM_ITERS; i++ ) { if ( elapsed_cyc[i] - elapsed_cyc[i - 1] < 0 ) { CLOCK_ERROR = 1; test_fail( __FILE__, __LINE__, "Negative elapsed time", -1 ); free( elapsed_cyc ); return; } diff_cyc = elapsed_cyc[i] - elapsed_cyc[i - 1]; if ( min > diff_cyc ) min = ( double ) diff_cyc; if ( max < diff_cyc ) max = ( double ) diff_cyc; if ( diff_cyc != 0 ) uniq_cyc++; total_cyc += diff_cyc; } average = ( double ) total_cyc / ( NUM_ITERS - 1 ); std = 0; for ( i = 1; i < NUM_ITERS; i++ ) { tmp = ( double ) ( elapsed_cyc[i] - elapsed_cyc[i - 1] ); tmp = tmp - average; std += tmp * tmp; } std = sqrt( std / ( NUM_ITERS - 2 ) ); printf( "%s: min %.3lf max %.3lf \n", func_name[flag], min, max ); printf( " average %.3lf std %.3lf\n", average, std ); if ( !TESTS_QUIET ) { if ( uniq_cyc == NUM_ITERS - 1 ) { printf( "%s : %7.3f <%7.3f\n", func_name[flag], ( double ) total_cyc / ( double ) ( NUM_ITERS ), ( double ) total_cyc / ( double ) uniq_cyc ); } else if ( uniq_cyc ) { printf( "%s : %7.3f %7.3f\n", func_name[flag], ( double ) total_cyc / ( double ) ( NUM_ITERS ), ( double ) total_cyc / ( double ) uniq_cyc ); } else { printf( "%s : %7.3f >%7.3f\n", func_name[flag], ( double ) total_cyc / ( double ) ( NUM_ITERS ), ( double ) total_cyc ); } } free( elapsed_cyc ); }
int main(int argc, char **argv) { int retval, num_tests = 2, eventcnt, events[2], i, tmp; int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; int PAPI_event; long_long values1[2], values2[2]; long_long elapsed_cyc; char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; retval = PAPI_library_init(PAPI_VER_CURRENT); retval = PAPI_set_debug(PAPI_VERB_ECONT); /* query and set up the right instruction to monitor */ if (PAPI_query_event(PAPI_FP_OPS) == PAPI_OK) PAPI_event = PAPI_FP_OPS; else PAPI_event = PAPI_TOT_INS; retval = PAPI_event_code_to_name(PAPI_event, event_name); sprintf(add_event_str, "PAPI_add_event[%s]", event_name); retval = PAPI_create_eventset(&EventSet1); /* Add the events */ retval = PAPI_add_event(EventSet1, PAPI_event); retval = PAPI_add_event(EventSet1, PAPI_TOT_CYC); /* Add them reversed to EventSet2 */ retval = PAPI_create_eventset(&EventSet2); eventcnt = 2; retval = PAPI_list_events(EventSet1, events, &eventcnt); for (i = eventcnt - 1; i >= 0; i--) { retval = PAPI_event_code_to_name(events[i], event_name); retval = PAPI_add_event(EventSet2, events[i]); } elapsed_cyc = PAPI_get_real_cyc(); retval = PAPI_start(EventSet1); do_flops(NUM_FLOPS); retval = PAPI_stop(EventSet1, values1); retval = PAPI_start(EventSet2); do_flops(NUM_FLOPS); retval = PAPI_stop(EventSet2, values2); elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; retval = PAPI_cleanup_eventset(EventSet1); /* JT */ retval = PAPI_destroy_eventset(&EventSet1); retval = PAPI_cleanup_eventset(EventSet2); /* JT */ retval = PAPI_destroy_eventset(&EventSet2); printf("Test case 0: start, stop.\n"); printf("-----------------------------------------------\n"); tmp = PAPI_get_opt(PAPI_DEFDOM, NULL); tmp = PAPI_get_opt(PAPI_DEFGRN, NULL); printf("Using %d iterations of c += a*b\n", NUM_FLOPS); printf ("-------------------------------------------------------------------------\n"); printf("Test type : \t 1\t 2\n"); printf("%ld %ld\n", values1[0], values2[1]); printf("%d %d\n", "PAPI_TOT_INS : \t", values1[1], values2[0]); printf("%ld\n", "Real cycles : \t", elapsed_cyc); printf ("-------------------------------------------------------------------------\n"); printf("Verification: none\n"); exit(1); }
void * Thread( void *arg ) { int retval, num_tests = 1, i; int EventSet1 = PAPI_NULL, mask1, PAPI_event; int num_events1; long long **values; long long elapsed_us, elapsed_cyc; unsigned short *profbuf; char event_name[PAPI_MAX_STR_LEN]; retval = PAPI_register_thread( ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); profbuf = ( unsigned short * ) malloc( length * sizeof ( unsigned short ) ); if ( profbuf == NULL ) exit( 1 ); memset( profbuf, 0x00, length * sizeof ( unsigned short ) ); /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depends on the availability of the event on the platform */ EventSet1 = add_two_nonderived_events( &num_events1, &PAPI_event, &mask1 ); values = allocate_test_space( num_tests, num_events1 ); if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_profil( profbuf, length, my_start, 65536, EventSet1, PAPI_event, THR, PAPI_PROFIL_POSIX ); if ( retval ) test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); if ( ( retval = PAPI_start( EventSet1 ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( *( int * ) arg ); if ( ( retval = PAPI_stop( EventSet1, values[0] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; /* to remove the profile flag */ retval = PAPI_profil( profbuf, length, my_start, 65536, EventSet1, PAPI_event, 0, PAPI_PROFIL_POSIX ); if ( retval ) test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); remove_test_events( &EventSet1, mask1 ); if ( !TESTS_QUIET ) { if ( mask1 == 0x3 ) { printf( "Thread 0x%x PAPI_TOT_INS : \t%lld\n", ( int ) pthread_self( ), ( values[0] )[0] ); } else { printf( "Thread 0x%x PAPI_FP_INS : \t%lld\n", ( int ) pthread_self( ), ( values[0] )[0] ); } printf( "Thread 0x%x PAPI_TOT_CYC: \t%lld\n", ( int ) pthread_self( ), ( values[0] )[1] ); printf( "Thread 0x%x Real usec : \t%lld\n", ( int ) pthread_self( ), elapsed_us ); printf( "Thread 0x%x Real cycles : \t%lld\n", ( int ) pthread_self( ), elapsed_cyc ); printf( "Test case: PAPI_profil() for pthreads\n" ); printf( "----Profile buffer for Thread 0x%x---\n", ( int ) pthread_self( ) ); for ( i = 0; i < ( int ) length; i++ ) { if ( profbuf[i] ) printf( "0x%lx\t%d\n", ( unsigned long ) ( my_start + 2 * i ), profbuf[i] ); } } for ( i = 0; i < ( int ) length; i++ ) if ( profbuf[i] ) break; if ( i >= ( int ) length ) test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); free_test_space( values, num_tests ); retval = PAPI_unregister_thread( ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); return ( NULL ); }
int main(){ /************************************/ long_long checksum = 0; int i,j,k; for (i = 0; i < N; ++i) for (j = 0; j < N; ++j){ mul1[i][j]= (i+j) % 8 + 1; mul2[i][j]= (N-i+j) % 8 + 1; res[i][j] = 0; } /************************************/ int retval, EventSet=PAPI_NULL; long_long values[3]; long_long start_cycles, end_cycles, start_usec, end_usec; /* Initialize the PAPI library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { fprintf(stderr, "PAPI library init error!\n"); exit(1); } /* Create the Event Set */ if (PAPI_create_eventset(&EventSet) != PAPI_OK) handle_error(1, "create_eventset"); /* Add L1 data cache misses to the Event Set */ if (PAPI_add_event(EventSet,PAPI_L1_DCM) != PAPI_OK) handle_error(1,"add_event - L1_DCM"); /* Add load instructions completed to the Event Set */ if (PAPI_add_event(EventSet,PAPI_LD_INS) != PAPI_OK) handle_error(1,"add_event - LD_INS"); /* Add store instructions completed to the Event Set */ if (PAPI_add_event(EventSet,PAPI_SR_INS) != PAPI_OK) handle_error(1,"add_event - SR_INS"); /* Reset the counting events in the Event Set */ if (PAPI_reset(EventSet) != PAPI_OK) handle_error(1,"reset"); /* Read the counting of events in the Event Set */ if (PAPI_read(EventSet, values) != PAPI_OK) handle_error(1,"read"); printf("After resetting counter 'PAPI_L1_DCM' [x10^6]: %f\n", \ (double)(values[0])/1000000); printf("After resetting counter 'PAPI_LD_INS' [x10^6]: %f\n", \ (double)(values[1])/1000000); printf("After resetting counter 'PAPI_SR_INS' [x10^6]: %f\n", \ (double)(values[2])/1000000); /* Start counting events in the Event Set */ if (PAPI_start(EventSet) != PAPI_OK) handle_error(1,"start"); /* Gets the starting time in clock cycles */ start_cycles = PAPI_get_real_cyc(); /* Gets the starting time in microseconds */ start_usec = PAPI_get_real_usec(); /************************************/ /* MATRIX MULTIPLICATION */ /************************************/ for (i = 0; i < N; ++i) for (j = 0; j < N; ++j) for (k = 0; k < N; ++k) res[i][j] += mul1[i][k] * mul2[k][j]; /************************************/ /* Gets the ending time in clock cycles */ end_cycles = PAPI_get_real_cyc(); /* Gets the ending time in microseconds */ end_usec = PAPI_get_real_usec(); /* Stop the counting of events in the Event Set */ if (PAPI_stop(EventSet, values) != PAPI_OK) handle_error(1,"stop"); printf("After stopping counter 'PAPI_L1_DCM' [x10^6]: %f\n", \ (double)(values[0])/1000000); printf("After stopping counter 'PAPI_LD_INS' [x10^6]: %f\n", \ (double)(values[1])/1000000); printf("After stopping counter 'PAPI_SR_INS' [x10^6]: %f\n", \ (double)(values[2])/1000000); printf("Wall clock cycles [x10^6]: %f\n", \ (double)(end_cycles - start_cycles)/1000000); printf("Wall clock time [seconds]: %f\n", \ (double)(end_usec - start_usec)/1000000); for (i = 0; i < N; ++i) for (j = 0; j < N; ++j) checksum+=res[i][j]; printf("Matrix checksum: %lld\n", checksum); return(0); }
int main(int argc, char **argv) { int i, retval, EventSet = PAPI_NULL; int bins = 100; int show_dist = 0, show_std_dev = 0; long long totcyc, values[2]; long long *array; tests_quiet(argc, argv); /* Set TESTS_QUIET variable */ for (i = 0; i < argc; i++) { if (argv[i]) { if (strstr(argv[i], "-b")) { bins = atoi(argv[i+1]); if (bins) i++; else { printf ("-b requires a bin count!\n"); exit(1); } } if (strstr(argv[i], "-d")) show_dist = 1; if (strstr(argv[i], "-h")) { print_help(); exit(1); } if (strstr(argv[i], "-s")) show_std_dev = 1; if (strstr(argv[i], "-t")) { num_iters = atol(argv[i+1]); if (num_iters) i++; else { printf ("-t requires a threshold value!\n"); exit(1); } } } } printf("Cost of execution for PAPI start/stop, read and accum.\n"); printf("This test takes a while. Please be patient...\n"); if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) test_fail(__FILE__, __LINE__, "PAPI_library_init", retval); if ((retval = PAPI_set_debug(PAPI_VERB_ECONT)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_set_debug", retval); if ((retval = PAPI_query_event(PAPI_TOT_CYC)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_query_event", retval); if ((retval = PAPI_query_event(PAPI_TOT_INS)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_query_event", retval); if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval); if ((retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_add_event", retval); if ((retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) if ((retval = PAPI_add_event(EventSet, PAPI_TOT_IIS)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_add_event", retval); /* Make sure no errors and warm up */ totcyc = PAPI_get_real_cyc(); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); if ((retval = PAPI_stop(EventSet, NULL)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); array = (long long *)malloc(num_iters*sizeof(long long)); if (array == NULL ) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); /* Determine clock latency */ printf("\nPerforming loop latency test...\n"); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } do_output(0, array, bins, show_std_dev, show_dist); /* Start the start/stop eval */ printf("\nPerforming start/stop test...\n"); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_start(EventSet); PAPI_stop(EventSet, values); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } do_output(1, array, bins, show_std_dev, show_dist); /* Start the read eval */ printf("\nPerforming read test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); PAPI_read(EventSet, values); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_read(EventSet, values); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); do_output(2, array, bins, show_std_dev, show_dist); /* Start the read with timestamp eval */ printf("\nPerforming read with timestamp test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); PAPI_read_ts(EventSet, values, &totcyc); for (i = 0; i < num_iters; i++) { PAPI_read_ts(EventSet, values, &array[i]); } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); /* post-process the timing array */ for (i = num_iters - 1; i > 0 ; i--) { array[i] -= array[i-1]; } array[0] -= totcyc; do_output(3, array, bins, show_std_dev, show_dist); /* Start the accum eval */ printf("\nPerforming accum test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); PAPI_accum(EventSet, values); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_accum(EventSet, values); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); do_output(4, array, bins, show_std_dev, show_dist); /* Start the reset eval */ printf("\nPerforming reset test...\n"); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); for (i = 0; i < num_iters; i++) { totcyc = PAPI_get_real_cyc(); PAPI_reset(EventSet); totcyc = PAPI_get_real_cyc() - totcyc; array[i] = totcyc; } if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); do_output(5, array, bins, show_std_dev, show_dist); free(array); test_pass(__FILE__, NULL, 0); exit(1); }
uint64_t vt_metric_real_cyc(void) { return (uint64_t)PAPI_get_real_cyc(); }
int main(int argc, char *argv[]) { if (argc < 4) { printf("Usage: %s data_type(text or bin) input_file output_file\n", argv[0]); return EXIT_FAILURE; } char *file_type = argv[1]; char *file_in = argv[2]; char *file_out = argv[3]; char *str1 = "SU.vtk"; char *str2 = "VAR.vtk"; char *str3 = "CGUP.vtk"; char *file_perf = "pstats.dat"; int status = 0; /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. The external cells are only ghost cells. They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; /**parameter used for volmesh and reading binary input file */ int* nodeCnt; int*** points; int*** elems; /**Measured Performance and Papi parameters*/ long long *values_i = (long long *) calloc(sizeof(long long), 4); long long *values_c = (long long *) calloc(sizeof(long long), 4); long long *values_o = (long long *) calloc(sizeof(long long), 4); double *mflops = (double *) calloc(sizeof(double), 3); double *L1mira = (double *) calloc(sizeof(double), 3); double *Lmirate = (double *) calloc(sizeof(double), 3); double *util = (double *) calloc(sizeof(double), 3); long long *et = (long long *) calloc(sizeof(long long), 3); long long start_cycles, start_usec,end_cycles_1, end_usec_1, end_cycles_2, end_cycles_3, end_usec_2, end_usec_3; /**In cluster mpp_inter L1 and L2 events can not computed at the same time, so set into two groups*/ int Events[NUM_EVENTS]={PAPI_L2_TCM,PAPI_L2_TCA,PAPI_FP_INS,PAPI_TOT_CYC}; // int Events[NUM_EVENTS]={PAPI_L1_TCM,PAPI_L1_TCA,PAPI_FP_INS,PAPI_TOT_CYC}; /**start HW counters and execution time recorder*/ if ( PAPI_start_counters( Events, NUM_EVENTS ) != PAPI_OK ) printf("Fail to start PAPI counter\n"); start_cycles = PAPI_get_real_cyc(); // Gets the starting time in clock cycles start_usec = PAPI_get_real_usec(); // Gets the starting time in microseconds /* initialization */ // read-in the input file int f_status; if (strcmp(file_type,"text") == 0) { f_status = read_formatted(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard); } else if (strcmp(file_type,"bin") == 0) { f_status = read_formatted_bin(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su,&nboard); } else { printf ("Input file format is nor correct\n"); return EXIT_FAILURE; } if (f_status != 0){ printf("failed to initialize data!\n"); return EXIT_FAILURE; } // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double* resvec = (double *) calloc(sizeof(double), (nintcf + 1)); /** the variation vector -> keeps the result in the end */ double* var = (double *) calloc(sizeof(double), (nextcf + 1)); /** the computation vectors */ double* direc1 = (double *) calloc(sizeof(double), (nextcf + 1)); double* direc2 = (double *) calloc(sizeof(double), (nextcf + 1)); /** additional vectors */ double* cgup = (double *) calloc(sizeof(double), (nextcf + 1)); double* oc = (double *) calloc(sizeof(double), (nintcf + 1)); double* cnorm = (double *) calloc(sizeof(double), (nintcf + 1)); double* adxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double* adxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); double* dxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double* dxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); /**store volume information*/ int nc=0; // initialize the reference residual for ( nc = nintci; nc <= nintcf; nc++) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt(resref); if (resref < 1.0e-15){ printf("i/o - error: residue sum less than 1.e-15 - %lf\n", resref); return EXIT_FAILURE; } // initialize the arrays for (nc = 0; nc <= 10; nc++){ oc[nc] = 0.0; cnorm[nc] = 1.0; } for (nc = nintci; nc <= nintcf; nc++){ cgup[nc] = 0.0; var[nc] = 0.0; } for (nc = nextci; nc <= nextcf; nc++){ var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for (nc = nintci; nc <= nintcf; nc++){ cgup[nc] = 1.0 / bp[nc]; } int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ /*read PAPI HW counters and caculate performance of input phase*/ if ( PAPI_read_counters( values_i, NUM_EVENTS ) != PAPI_OK ){ printf("fail to stop papi counter"); } Lmirate[0] = (double) values_i[0] / values_i[1]; end_usec_1 = PAPI_get_real_usec(); mflops[0] = (double) values_i[2] / (end_usec_1-start_usec); util[0] = mflops[0] / PEAKPER; /* start computation loop */ while (iter < 10000){ /* start phase 1 */ // update the old values of direc for (nc = nintci; nc <= nintcf; nc++){ direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for (nc = nintci; nc <= nintcf; nc++){ direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if (nor1 == 1){ oc1 = 0; occ = 0; for (nc = nintci; nc <= nintcf; nc++){ occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for (nc = nintci; nc <= nintcf; nc++){ direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; }else if (nor1 == 2){ oc1 = 0; occ = 0; for (nc = nintci; nc <= nintcf; nc++){ occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for (nc = nintci; nc <= nintcf; nc++){ occ = occ + adxor2[nc] * direc2[nc]; } oc2 = occ / cnorm[2]; for (nc = nintci; nc <= nintcf; nc++){ direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for (nc = nintci; nc <= nintcf; nc++){ cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for (nc = nintci; nc <= nintcf; nc++){ var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt(resnew); ratio = resnew / resref; // exit on no improvements of residual if (ratio <= 1.0e-10){ break; } iter++; // prepare additional arrays for the next iteration step if (nor == nomax){ nor = 1; }else{ if (nor == 1){ for (nc = nintci; nc <= nintcf; nc++){ dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if (nor == 2){ for (nc = nintci; nc <= nintcf; nc++){ dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ /*read PAPI HW counters and caculate performance of computation phase*/ end_cycles_2 = PAPI_get_real_cyc(); // Gets the ending time in clock cycles end_usec_2 = PAPI_get_real_usec(); // Gets the ending time in microseconds if ( PAPI_read_counters( values_c, NUM_EVENTS ) != PAPI_OK ){ printf("fail to read papi counter"); } Lmirate[1] = (double) values_c[0]/values_c[1]; mflops[1] = (double) values_c[2] / ( end_usec_2-end_usec_1 ); util[1] = mflops[1] / PEAKPER; /* write output file */ if ( write_result(file_in, file_out, nintci, nintcf, var, iter, ratio) != 0 ) printf("error when trying to write to file %s\n", file_out); //transfer volume to mesh if (vol2mesh(nintci, nintcf, lcc, &nodeCnt, &points, &elems) != 0 ){ printf("error when trying to converge topology to volume"); } //write output to vtk file if (write_result_vtk(str1, nintci, nintcf, nodeCnt, points, elems, su) != 0){ printf("error when write SU to vtk file"); } if (write_result_vtk(str2, nintci, nintcf, nodeCnt, points, elems, var) != 0){ printf("error when write VAR to vtk file"); } if (write_result_vtk(str3, nintci, nintcf, nodeCnt, points, elems, cgup) != 0){ printf("error when write CGUP to vtk file"); } /*read PAPI HW counters and caculate performance of output phase*/ if ( PAPI_stop_counters( values_o, NUM_EVENTS ) != PAPI_OK ){ printf("fail to stop papi counter"); } Lmirate[2] = (double) values_o[0]/values_o[1]; end_cycles_3 = PAPI_get_real_cyc(); // Gets the ending time in clock cycles end_usec_3 = PAPI_get_real_usec(); // Gets the ending time in microseconds mflops[2] = (double) (values_o[2])/(end_usec_3-end_usec_2); util[2] = mflops[2] / PEAKPER; /** Write all measured performance to pstats.dat*/ et[0] = end_usec_1-start_usec; et[1] = end_usec_2-end_usec_1; et[2] = end_usec_3-end_usec_2; if (write_result_dat(file_perf, values_i,values_c, values_o,Lmirate, et, mflops, util) != 0 ){ printf("error when write measured performance to data file"); } /* Free all the dynamically allocated memory */ free(direc2); free(direc1); free(dxor2); free(dxor1); free(adxor2); free(adxor1); free(cnorm); free(oc); free(var); free(cgup); free(resvec); free(su); free(bp); free(bh); free(bl); free(bw); free(bn); free(be); free(bs); printf("Simulation completed successfully!\n"); return EXIT_SUCCESS; }