void print_i7z_single () { struct cpu_heirarchy_info chi; struct cpu_socket_info socket_0={.max_cpu=0, .socket_num=0, .processor_num={-1,-1,-1,-1,-1,-1,-1,-1}}; struct cpu_socket_info socket_1={.max_cpu=0, .socket_num=1, .processor_num={-1,-1,-1,-1,-1,-1,-1,-1}}; construct_CPU_Heirarchy_info(&chi); construct_sibling_list(&chi); // print_CPU_Heirarchy(chi); construct_socket_information(&chi, &socket_0, &socket_1, socket_0_num, socket_1_num); // print_socket_information(&socket_0); // print_socket_information(&socket_1); int printw_offset = (0) * 14; //Make an array size max 8 (to accomdate Nehalem-EXEX -lol) to store the core-num that are candidates for a given socket //removing it from here as it is already allocated in the function //int *core_list, core_list_size_phy, core_list_size_log; //iterator int i; //turbo_mode enabled/disabled flag char TURBO_MODE; double cpu_freq_cpuinfo; cpu_freq_cpuinfo = cpufreq_info (); //estimate the freq using the estimate_MHz() code that is almost mhz accurate cpu_freq_cpuinfo = estimate_MHz (); //Print a slew of information on the ncurses window //I already print that in the loop so.. mvprintw (0, 0, "WAIT .... "); //estimate the freq using the estimate_MHz() code that is almost mhz accurate cpu_freq_cpuinfo = estimate_MHz (); mvprintw (3, 0, "True Frequency (without accounting Turbo) %0.0f MHz\n", cpu_freq_cpuinfo); //MSR number and hi:low bit of that MSR //This msr contains a lot of stuff, per socket wise //one can pass any core number and then get in multiplier etc int PLATFORM_INFO_MSR = 206; //CE 15:8 int PLATFORM_INFO_MSR_low = 8; int PLATFORM_INFO_MSR_high = 15; unsigned long long int old_val_CORE[2][numCPUs_max], new_val_CORE[2][numCPUs_max]; unsigned long long int old_val_REF[2][numCPUs_max], new_val_REF[2][numCPUs_max]; unsigned long long int old_val_C3[2][numCPUs_max], new_val_C3[2][numCPUs_max]; unsigned long long int old_val_C6[2][numCPUs_max], new_val_C6[2][numCPUs_max]; unsigned long long int old_val_C7[2][numCPUs_max], new_val_C7[2][numCPUs_max]; unsigned long long int old_TSC[2][numCPUs_max], new_TSC[2][numCPUs_max]; long double C0_time[2][numCPUs_max], C1_time[2][numCPUs_max], C3_time[2][numCPUs_max], C6_time[2][numCPUs_max], C7_time[2][numCPUs_max]; double _FREQ[2][numCPUs_max], _MULT[2][numCPUs_max]; struct timeval tvstart[2][numCPUs_max], tvstop[2][numCPUs_max]; struct timespec one_second_sleep; one_second_sleep.tv_sec = 0; one_second_sleep.tv_nsec = 499999999; // 500msec //Get turbo mode status by reading msr within turbo_status TURBO_MODE = turbo_status (); //Flags and other things about HT. int HT_ON; char HT_ON_str[30]; int kk_1 = 11; //below variables is used to monitor if any cores went offline etc. int online_cpus[MAX_PROCESSORS]; //Max 2 x Nehalem-EX with total 32 threads double estimated_mhz=0; int socket_num; //below variables stores how many cpus were observed till date for the socket int max_cpus_observed=0; for (;;) { construct_CPU_Heirarchy_info(&chi); construct_sibling_list(&chi); construct_socket_information(&chi, &socket_0, &socket_1, socket_0_num, socket_1_num); //HT enabled if num logical > num physical cores if (chi.HT==1) { strncpy (HT_ON_str, "Hyper Threading ON\0", 30); HT_ON = 1; } else { strncpy (HT_ON_str, "Hyper Threading OFF\0", 30); HT_ON = 0; } refresh (); SET_ONLINE_ARRAY_PLUS1(online_cpus) //In the function calls below socket_num is set to the socket to print for //printw_offset is the offset gap between the printing of the two sockets //kk_1 and kk_2 are the variables that have to be set, i have to use them internally //so in future if there are more sockets to be printed, add more kk_* socket_num=0; printw_offset=0; //printf("socket0 max cpu %d\n",socket_0.max_cpu); //printf("socket1 max cpu %d\n",socket_0.max_cpu); //below code in (else case) is to handle when for 2 sockets system, cpu1 is populated and cpu0 is empty. //single socket code but in an intelligent manner and not assuming that cpu0 is always populated before cpu1 if(socket_0.max_cpu>1){ socket_num=0; print_i7z_socket_single(socket_0, printw_offset, PLATFORM_INFO_MSR, PLATFORM_INFO_MSR_high, PLATFORM_INFO_MSR_low, online_cpus, cpu_freq_cpuinfo, one_second_sleep, TURBO_MODE, HT_ON_str, &kk_1, old_val_CORE[socket_num], old_val_REF[socket_num], old_val_C3[socket_num], old_val_C6[socket_num],old_val_C7[socket_num], old_TSC[socket_num], estimated_mhz, new_val_CORE[socket_num], new_val_REF[socket_num], new_val_C3[socket_num], new_val_C6[socket_num],new_val_C7[socket_num], new_TSC[socket_num], _FREQ[socket_num], _MULT[socket_num], C0_time[socket_num], C1_time[socket_num], C3_time[socket_num], C6_time[socket_num],C7_time[socket_num], tvstart[socket_num], tvstop[socket_num], &max_cpus_observed); }else{ socket_num=1; print_i7z_socket_single(socket_1, printw_offset, PLATFORM_INFO_MSR, PLATFORM_INFO_MSR_high, PLATFORM_INFO_MSR_low, online_cpus, cpu_freq_cpuinfo, one_second_sleep, TURBO_MODE, HT_ON_str, &kk_1, old_val_CORE[socket_num], old_val_REF[socket_num], old_val_C3[socket_num], old_val_C6[socket_num],old_val_C7[socket_num], old_TSC[socket_num], estimated_mhz, new_val_CORE[socket_num], new_val_REF[socket_num], new_val_C3[socket_num], new_val_C6[socket_num],new_val_C7[socket_num], new_TSC[socket_num], _FREQ[socket_num], _MULT[socket_num], C0_time[socket_num], C1_time[socket_num], C3_time[socket_num], C6_time[socket_num],C7_time[socket_num], tvstart[socket_num], tvstop[socket_num], &max_cpus_observed); } } }
void MyThread::run () { print_CPU_Hierarchy(chi); int i, ii; //MSR number and hi:low bit of that MSR //This msr contains a lot of stuff, per socket wise //one can pass any core number and then get in multiplier etc int PLATFORM_INFO_MSR = 206; //CE 15:8 int PLATFORM_INFO_MSR_low = 8; int PLATFORM_INFO_MSR_high = 15; ////To find out if Turbo is enabled use the below msr and bit 38 ////bit for TURBO is 38 ////msr reading is now moved into tubo_status //int IA32_MISC_ENABLE = 416; //int TURBO_FLAG_low = 38; //int TURBO_FLAG_high = 38; //int MSR_TURBO_RATIO_LIMIT = 429; int CPU_NUM; int CPU_Multiplier; float BLCK; char TURBO_MODE; printf("i7z DEBUG: GUI VERSION DOESN'T SUPPORT CORE OFFLINING\n"); sleep (1); // 3B defines till Max 4 Core and the rest bit values from 32:63 were reserved. // int MAX_TURBO_1C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 7, 0); // int MAX_TURBO_2C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 15, 8); // int MAX_TURBO_3C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 23, 16); // int MAX_TURBO_4C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 31, 24); //CPUINFO is wrong for i7 but correct for the number of physical and logical cores present //If Hyperthreading is enabled then, multiple logical processors will share a common CORE ID //http://www.redhat.com/magazine/022aug06/departments/tips_tricks/ system ("cat /proc/cpuinfo |grep MHz|sed 's/cpu\\sMHz\\s*:\\s//'|tail -n 1 > /tmp/cpufreq.txt"); system ("grep \"core id\" /proc/cpuinfo |sort -|uniq -|wc -l > /tmp/numPhysical.txt"); system ("grep \"processor\" /proc/cpuinfo |sort -|uniq -|wc -l > /tmp/numLogical.txt"); //Open the parsed cpufreq file and obtain the cpufreq from /proc/cpuinfo FILE *tmp_file; tmp_file = fopen ("/tmp/cpufreq.txt", "r"); char tmp_str[30]; fgets (tmp_str, 30, tmp_file); double cpu_freq_cpuinfo = atof (tmp_str); fclose (tmp_file); unsigned int numPhysicalCores, numLogicalCores; numPhysicalCores = socket_0.num_physical_cores + socket_1.num_physical_cores; numLogicalCores = socket_0.num_logical_cores + socket_1.num_logical_cores; // printf("My thread: Num Processors %d\n",numCPUs); int error_indx; //estimate the freq using the estimate_MHz() code that is almost mhz accurate cpu_freq_cpuinfo = estimate_MHz (); //We just need one CPU (we use Core-0) to figure out the multiplier and the bus clock freq. CPU_NUM = 0; CPU_Multiplier = get_msr_value (CPU_NUM, PLATFORM_INFO_MSR, PLATFORM_INFO_MSR_high, PLATFORM_INFO_MSR_low, &error_indx); BLCK = cpu_freq_cpuinfo / CPU_Multiplier; TURBO_MODE = turbo_status (); //get_msr_value(CPU_NUM,IA32_MISC_ENABLE, TURBO_FLAG_high,TURBO_FLAG_low); //to find how many cpus are enabled, we could have used sysconf but that will just give the logical numbers //if HT is enabled then the threads of the same core have the same C-state residency number so... //Its imperative to figure out the number of physical and number of logical cores. //sysconf(_SC_NPROCESSORS_ONLN); bool HT_ON; char HT_ON_str[30]; if (numLogicalCores > numPhysicalCores) { strcpy (HT_ON_str, "Hyper Threading ON"); HT_ON = true; } else { strcpy (HT_ON_str, "Hyper Threading OFF"); HT_ON = false; } float TRUE_CPU_FREQ; if (TURBO_MODE == 1) { TRUE_CPU_FREQ = BLCK * ((double)CPU_Multiplier + 1); } else { TRUE_CPU_FREQ = BLCK * ((double)CPU_Multiplier); } int IA32_PERF_GLOBAL_CTRL = 911; //3BF int IA32_PERF_GLOBAL_CTRL_Value = get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0,&error_indx); int IA32_FIXED_CTR_CTL = 909; //38D int IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0,&error_indx); //printf("IA32_PERF_GLOBAL_CTRL %d\n",IA32_PERF_GLOBAL_CTRL_Value); //printf("IA32_FIXED_CTR_CTL %d\n",IA32_FIXED_CTR_CTL_Value); unsigned long long int CPU_CLK_UNHALTED_CORE, CPU_CLK_UNHALTED_REF, CPU_CLK_C3, CPU_CLK_C6, CPU_CLK_C1; CPU_CLK_UNHALTED_CORE = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx); CPU_CLK_UNHALTED_REF = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx); unsigned long long int old_val_CORE[numCPUs], new_val_CORE[numCPUs]; unsigned long long int old_val_REF[numCPUs], new_val_REF[numCPUs]; unsigned long long int old_val_C3[numCPUs], new_val_C3[numCPUs]; unsigned long long int old_val_C6[numCPUs], new_val_C6[numCPUs]; // unsigned long int old_val_C1[numCPUs], new_val_C1[numCPUs]; unsigned long long int old_TSC[numCPUs], new_TSC[numCPUs]; struct timeval tvstart[numCPUs], tvstop[numCPUs]; struct timespec one_second_sleep; one_second_sleep.tv_sec = 0; one_second_sleep.tv_nsec = 999999999; // 1000msec unsigned long int IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0,&error_indx); unsigned long int IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0,&error_indx); // mvprintw(12,0,"Wait...\n"); refresh(); nanosleep (&one_second_sleep, NULL); IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0, &error_indx) - IA32_MPERF; IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0, &error_indx) - IA32_APERF; //printf("Diff. i n APERF = %u, MPERF = %d\n", IA32_MPERF, IA32_APERF); long double C0_time[numCPUs], C1_time[numCPUs], C3_time[numCPUs], C6_time[numCPUs]; double _FREQ[numCPUs], _MULT[numCPUs]; // mvprintw(12,0,"Current Freqs\n"); int kk=11; double estimated_mhz; for (;;) { Construct_Socket_Information_in_GUI(&numCPUs); if (kk>10) { kk=0; for (ii = 0; ii < (int)numCPUs; ii++) { CPU_NUM = core_list[ii]; IA32_PERF_GLOBAL_CTRL_Value = get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx); set_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 0x700000003LLU); IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx); set_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 819); IA32_PERF_GLOBAL_CTRL_Value = get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx); IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx); old_val_CORE[ii] = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx); old_val_REF[ii] = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx); old_val_C3[ii] = get_msr_value (CPU_NUM, 1020, 63, 0,&error_indx); old_val_C6[ii] = get_msr_value (CPU_NUM, 1021, 63, 0,&error_indx); old_TSC[ii] = rdtsc (); } } kk++; nanosleep (&one_second_sleep, NULL); estimated_mhz = estimate_MHz(); for (i = 0; i < (int)numCPUs; i++) { CPU_NUM = core_list[i]; new_val_CORE[i] = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx); new_val_REF[i] = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx); new_val_C3[i] = get_msr_value (CPU_NUM, 1020, 63, 0,&error_indx); new_val_C6[i] = get_msr_value (CPU_NUM, 1021, 63, 0,&error_indx); new_TSC[i] = rdtsc (); if (old_val_CORE[i] > new_val_CORE[i]) { CPU_CLK_UNHALTED_CORE = (3.40282366921e38 - old_val_CORE[i]) + new_val_CORE[i]; } else { CPU_CLK_UNHALTED_CORE = new_val_CORE[i] - old_val_CORE[i]; } //number of TSC cycles while its in halted state if ((new_TSC[i] - old_TSC[i]) < CPU_CLK_UNHALTED_CORE) CPU_CLK_C1 = 0; else CPU_CLK_C1 = ((new_TSC[i] - old_TSC[i]) - CPU_CLK_UNHALTED_CORE); if (old_val_REF[i] > new_val_REF[i]) { CPU_CLK_UNHALTED_REF = (3.40282366921e38 - old_val_REF[i]) + new_val_REF[i]; } else { CPU_CLK_UNHALTED_REF = new_val_REF[i] - old_val_REF[i]; } if (old_val_C3[i] > new_val_C3[i]) { CPU_CLK_C3 = (3.40282366921e38 - old_val_C3[i]) + new_val_C3[i]; } else { CPU_CLK_C3 = new_val_C3[i] - old_val_C3[i]; } if (old_val_C6[i] > new_val_C6[i]) { CPU_CLK_C6 = (3.40282366921e38 - old_val_C6[i]) + new_val_C6[i]; } else { CPU_CLK_C6 = new_val_C6[i] - old_val_C6[i]; } _FREQ[i] = estimated_mhz * ((long double) CPU_CLK_UNHALTED_CORE / (long double) CPU_CLK_UNHALTED_REF); _MULT[i] = _FREQ[i] / BLCK; C0_time[i] = ((long double) CPU_CLK_UNHALTED_REF / (long double) (new_TSC[i] - old_TSC[i])); long double c1_time = ((long double) CPU_CLK_C1 / (long double) (new_TSC[i] - old_TSC[i])); C3_time[i] = ((long double) CPU_CLK_C3 / (long double) (new_TSC[i] - old_TSC[i])); C6_time[i] = ((long double) CPU_CLK_C6 / (long double) (new_TSC[i] - old_TSC[i])); //C1_time[i] -= C3_time[i] + C6_time[i]; C1_time[i] = c1_time - (C3_time[i] + C6_time[i]) ; if (!isnan(c1_time) && !isinf(c1_time)) { if (C1_time[i] <= 0) { C1_time[i]=0; } } if (C0_time[i] < 1e-2) { if (C0_time[i] > 1e-4) C0_time[i] = 0.01; else C0_time[i] = 0; } if (C1_time[i] < 1e-2) { if (C1_time[i] > 1e-4) C1_time[i] = 0.01; else C1_time[i] = 0; } if (C3_time[i] < 1e-2) { if (C3_time[i] > 1e-4) C3_time[i] = 0.01; else C3_time[i] = 0; } if (C6_time[i] < 1e-2) { if (C6_time[i] > 1e-4) C6_time[i] = 0.01; else C6_time[i] = 0; } } // printf("Hello"); // for(i=0;i<numCPUs;i++){ // printf("%g %Lg %Lg %Lg %Lg %lld %llu\n",_FREQ[i],C0_time[i]*100,C1_time[i]*100,C3_time[i]*100,C6_time[i]*100,CPU_CLK_UNHALTED_REF,(new_TSC[i] - old_TSC[i])); // printf("%g %llu %llu %llu %llu %llu\n",_FREQ[i],CPU_CLK_UNHALTED_REF,CPU_CLK_C1,CPU_CLK_C3,CPU_CLK_C6,(new_TSC[i] - old_TSC[i])); // printf("%llu %llu %lld\n",new_TSC[i], old_TSC[i],new_TSC[i]- old_TSC[i]); // } TRUE_CPU_FREQ = 0; for (i = 0; i < (int)numCPUs; i++) if (_FREQ[i] > TRUE_CPU_FREQ) TRUE_CPU_FREQ = _FREQ[i]; memcpy (old_val_CORE, new_val_CORE, sizeof (unsigned long int) * numCPUs); memcpy (old_val_REF, new_val_REF, sizeof (unsigned long int) * numCPUs); memcpy (old_val_C3, new_val_C3, sizeof (unsigned long int) * numCPUs); memcpy (old_val_C6, new_val_C6, sizeof (unsigned long int) * numCPUs); memcpy (tvstart, tvstop, sizeof (struct timeval) * numCPUs); memcpy (old_TSC, new_TSC, sizeof (unsigned long long int) * numCPUs); memcpy (FREQ, _FREQ, sizeof (double) * numCPUs); memcpy (MULT, _MULT, sizeof (double) * numCPUs); memcpy (C0_TIME, C0_time, sizeof (long double) * numCPUs); memcpy (C1_TIME, C1_time, sizeof (long double) * numCPUs); memcpy (C3_TIME, C3_time, sizeof (long double) * numCPUs); memcpy (C6_TIME, C6_time, sizeof (long double) * numCPUs); global_in_i7z_main_thread = true; } }