Пример #1
0
void print_i7z_socket_single(struct cpu_socket_info socket_0, int printw_offset, int PLATFORM_INFO_MSR,  int PLATFORM_INFO_MSR_high,  int PLATFORM_INFO_MSR_low,
                             int* online_cpus, double cpu_freq_cpuinfo,  struct timespec one_second_sleep, char TURBO_MODE,
                             char* HT_ON_str, int* kk_1, U_L_L_I * old_val_CORE, U_L_L_I * old_val_REF, U_L_L_I * old_val_C3, U_L_L_I * old_val_C6, U_L_L_I * old_val_C7,
                             U_L_L_I * old_TSC, int estimated_mhz,  U_L_L_I * new_val_CORE,  U_L_L_I * new_val_REF,  U_L_L_I * new_val_C3,
                             U_L_L_I * new_val_C6,  U_L_L_I * new_val_C7,  U_L_L_I * new_TSC,  double* _FREQ, double* _MULT, long double * C0_time, long double * C1_time,
                             long double * C3_time, long double * C6_time, long double * C7_time, struct timeval* tvstart, struct timeval* tvstop, int *max_observed_cpu)
{
    int numPhysicalCores, numLogicalCores;
    double TRUE_CPU_FREQ;

    //Print a slew of information on the ncurses window
    mvprintw (0, 0, "Cpu speed from cpuinfo %0.2fMhz\n", cpu_freq_cpuinfo);
    mvprintw (1, 0,
              "cpuinfo might be wrong if cpufreq is enabled. To guess correctly try estimating via tsc\n");
    mvprintw (2, 0, "Linux's inbuilt cpu_khz code emulated now\n\n");


    //estimate the freq using the estimate_MHz() code that is almost mhz accurate
    cpu_freq_cpuinfo = estimate_MHz ();
    mvprintw (3, 0, "True Frequency (without accounting Turbo) %0.0f MHz\n",
              cpu_freq_cpuinfo);

    int i, ii;
    //int k;
    int CPU_NUM;
    int* core_list;
    unsigned long int IA32_MPERF, IA32_APERF;
    int CPU_Multiplier, error_indx;
    unsigned long long int CPU_CLK_UNHALTED_CORE, CPU_CLK_UNHALTED_REF, CPU_CLK_C3, CPU_CLK_C6, CPU_CLK_C1, CPU_CLK_C7;
    //current blck value
    float BLCK;

    char print_core[32];
    long double c1_time;

    //use this variable to monitor the max number of cores ever online
    *max_observed_cpu = (socket_0.max_cpu > *max_observed_cpu)? socket_0.max_cpu: *max_observed_cpu;

    int core_list_size_phy, core_list_size_log;
    if (socket_0.max_cpu > 0) {
        //set the variable print_core to 0, use it to check if a core is online and doesnt
        //have any garbage values
        memset(print_core, 0, 6*sizeof(char));

        //We just need one CPU (we use Core-1) to figure out the multiplier and the bus clock freq.
        //multiplier doesnt automatically include turbo
        //note turbo is not guaranteed, only promised
        //So this msr will only reflect the actual multiplier, rest has to be figured out

        //Now get all the information about the socket from the structure
        CPU_NUM = socket_0.processor_num[0];
        core_list = socket_0.processor_num;
        core_list_size_phy = socket_0.num_physical_cores;
        core_list_size_log = socket_0.num_logical_cores;

        /*if (CPU_NUM == -1)
        {
              sleep (1);        //sleep for a bit hoping that the offline socket becomes online
              continue;
        }*/
        //number of CPUs is as told via cpuinfo
        int numCPUs = socket_0.num_physical_cores;

        CPU_Multiplier = get_msr_value (CPU_NUM, PLATFORM_INFO_MSR, PLATFORM_INFO_MSR_high, PLATFORM_INFO_MSR_low, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);

        //Blck is basically the true speed divided by the multiplier
        BLCK = cpu_freq_cpuinfo / CPU_Multiplier;

        //Use Core-1 as the one to check for the turbo limit
        //Core number shouldnt matter

        //bits from 0-63 in this store the various maximum turbo limits
        int MSR_TURBO_RATIO_LIMIT = 429;
        // 3B defines till Max 4 Core and the rest bit values from 32:63 were reserved.
        int MAX_TURBO_1C=0, MAX_TURBO_2C=0, MAX_TURBO_3C=0,
            MAX_TURBO_4C=0, MAX_TURBO_5C=0, MAX_TURBO_6C=0;

        if ( E7_mp_present){
            //e7 mp dont have 429 register so dont read the register.
        } else {
            //Bits:0-7  - core1
            MAX_TURBO_1C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 7, 0, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[0],-1);
            //Bits:15-8 - core2
            MAX_TURBO_2C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 15, 8, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[0],-1);
            //Bits:23-16 - core3
            MAX_TURBO_3C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 23, 16, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[0],-1);
            //Bits:31-24 - core4
            MAX_TURBO_4C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 31, 24, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[0],-1);

            //gulftown/Hexacore support
            //technically these should be the bits to get for core 5,6
            //Bits:39-32 - core4
            MAX_TURBO_5C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 39, 32, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[0],-1);
            //Bits:47-40 - core4
            MAX_TURBO_6C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 47, 40, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[0],-1);
        }
        //fflush (stdout);
        //sleep (1);

        char string_ptr1[200], string_ptr2[200];

        int IA32_PERF_GLOBAL_CTRL = 911;    //38F
        int IA32_PERF_GLOBAL_CTRL_Value;
        IA32_PERF_GLOBAL_CTRL_Value =   get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        int IA32_FIXED_CTR_CTL = 909;   //38D
        int IA32_FIXED_CTR_CTL_Value;
        IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);


        IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        CPU_CLK_UNHALTED_CORE = get_msr_value (CPU_NUM, 778, 63, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        CPU_CLK_UNHALTED_REF = get_msr_value (CPU_NUM, 779, 63, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        //SLEEP FOR 1 SECOND (500ms is also alright)
        nanosleep (&one_second_sleep, NULL);
        IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0, &error_indx) - IA32_MPERF;
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0, &error_indx) - IA32_APERF;
        SET_IF_TRUE(error_indx,online_cpus[0],-1);
        RETURN_IF_TRUE(online_cpus[0]==-1);

        mvprintw (4 + printw_offset, 0,"  CPU Multiplier %dx || Bus clock frequency (BCLK) %0.2f MHz \n",   CPU_Multiplier, BLCK);

        if (numCPUs <= 0) {
            sprintf (string_ptr1, "  Max TURBO Multiplier (if Enabled) with 0 cores is");
            sprintf (string_ptr2, " %dx/%dx ", MAX_TURBO_1C, MAX_TURBO_2C);
        }
        if (numCPUs >= 1 && numCPUs < 4) {
            sprintf (string_ptr1, "  Max TURBO Multiplier (if Enabled) with 1/2 Cores is");
            sprintf (string_ptr2, "  ");
        }
        if (numCPUs >= 2 && numCPUs < 6) {
            sprintf (string_ptr1, "  Max TURBO Multiplier (if Enabled) with 1/2/3/4 Cores is");
            sprintf (string_ptr2, " %dx/%dx/%dx/%dx ", MAX_TURBO_1C, MAX_TURBO_2C, MAX_TURBO_3C, MAX_TURBO_4C);
        }
        if (numCPUs >= 2 && numCPUs >= 6) { // Gulftown 6-cores, Nehalem-EX
            sprintf (string_ptr1, "  Max TURBO Multiplier (if Enabled) with 1/2/3/4/5/6 Cores is");
            sprintf (string_ptr2, " %dx/%dx/%dx/%dx/%dx/%dx ", MAX_TURBO_1C, MAX_TURBO_2C, MAX_TURBO_3C, MAX_TURBO_4C,
                     MAX_TURBO_5C, MAX_TURBO_6C);
        }

        numCPUs = core_list_size_phy;
        numPhysicalCores = core_list_size_phy;
        numLogicalCores = core_list_size_log;

        //if (socket_0.socket_num == 0) {
            mvprintw (19, 0, "C0 = Processor running without halting ");
            mvprintw (20, 0, "C1 = Processor running with halts (States >C0 are power saver modes with cores idling)");
            mvprintw (21, 0, "C3 = Cores running with PLL turned off and core cache turned off");
            mvprintw (22, 0, "C6, C7 = Everything in C3 + core state saved to last level cache, C7 is deeper than C6");
            mvprintw (23, 0, "  Above values in table are in percentage over the last 1 sec");
//            mvprintw (24, 0, "Total Logical Cores: [%d], Total Physical Cores: [%d] \n",  numLogicalCores, numPhysicalCores);
            mvprintw (24, 0, "[core-id] refers to core-id number in /proc/cpuinfo");
            mvprintw (25, 0, "'Garbage Values' message printed when garbage values are read");
            mvprintw (26, 0, "  Ctrl+C to exit");
        //}

        mvprintw (6 + printw_offset, 0, "Socket [%d] - [physical cores=%d, logical cores=%d, max online cores ever=%d] \n", socket_0.socket_num, numPhysicalCores, numLogicalCores,*max_observed_cpu);

        mvprintw (9 + printw_offset, 0, "%s %s\n", string_ptr1, string_ptr2);

        if (TURBO_MODE == 1) {
            mvprintw (7 + printw_offset, 0, "  TURBO ENABLED on %d Cores, %s\n", numPhysicalCores, HT_ON_str);
            TRUE_CPU_FREQ = BLCK * ((double) CPU_Multiplier + 1);
            mvprintw (8 + printw_offset, 0, "  Max Frequency without considering Turbo %0.2f MHz (%0.2f x [%d]) \n", TRUE_CPU_FREQ, BLCK, CPU_Multiplier + 1);
        } else {
            mvprintw (7 + printw_offset, 0, "  TURBO DISABLED on %d Cores, %s\n", numPhysicalCores, HT_ON_str);
            TRUE_CPU_FREQ = BLCK * ((double) CPU_Multiplier);
            mvprintw (8 + printw_offset, 0,"  Max Frequency without considering Turbo %0.2f MHz (%0.2f x [%d]) \n", TRUE_CPU_FREQ, BLCK, CPU_Multiplier);
        }

        //Primarily for 32-bit users, found that after sometimes the counters loopback, so inorder
        //to prevent loopback, reset the counters back to 0 after 10 iterations roughly 10 secs
        if (*kk_1 > 10) {
            *kk_1 = 0;
            for (i = 0; i <  numCPUs; i++) {
                //Set up the performance counters and then start reading from them
                assert(i < MAX_SK_PROCESSORS);
                CPU_NUM = core_list[i];
                ii = core_list[i];
                assert(i < MAX_PROCESSORS); //online_cpus[i]
                assert(ii < numCPUs_max);

                IA32_PERF_GLOBAL_CTRL_Value =   get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                set_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 0x700000003LLU);

                IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                set_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 819);

                IA32_PERF_GLOBAL_CTRL_Value =   get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                old_val_CORE[ii] = get_msr_value (CPU_NUM, 778, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                old_val_REF[ii] = get_msr_value (CPU_NUM, 779, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                old_val_C3[ii] = get_msr_value (CPU_NUM, 1020, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

                old_val_C6[ii] = get_msr_value (CPU_NUM, 1021, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);

        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
            //table b-20 in 325384 and only for sandy bridge
                old_val_C7[ii] = get_msr_value (CPU_NUM, 1022, 63, 0, &error_indx);
                SET_IF_TRUE(error_indx,online_cpus[i],-1);
                CONTINUE_IF_TRUE(online_cpus[i]==-1);
        }

                old_TSC[ii] = rdtsc ();
            }
        }
        (*kk_1)++;
        nanosleep (&one_second_sleep, NULL);
    if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
            mvprintw (11 + printw_offset, 0, "\tCore [core-id]  :Actual Freq (Mult.)\t  C0%%   Halt(C1)%%  C3 %%   C6 %%   C7 %%  Temp      VCore\n");
    }else{
            mvprintw (11 + printw_offset, 0, "\tCore [core-id]  :Actual Freq (Mult.)\t  C0%%   Halt(C1)%%  C3 %%   C6 %%  Temp      VCore\n");
    }
        //estimate the CPU speed
        estimated_mhz = estimate_MHz();

        for (i = 0; i <  numCPUs; i++) {
            //read from the performance counters
            //things like halted unhalted core cycles

            assert(i < MAX_SK_PROCESSORS);
            CPU_NUM = core_list[i];
            ii = core_list[i];
            assert(i < MAX_PROCESSORS); //online_cpus[i]
            assert(ii < numCPUs_max);

            new_val_CORE[ii] = get_msr_value (CPU_NUM, 778, 63, 0, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[i],-1);
            CONTINUE_IF_TRUE(online_cpus[i]==-1);

            new_val_REF[ii] = get_msr_value (CPU_NUM, 779, 63, 0, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[i],-1);
            CONTINUE_IF_TRUE(online_cpus[i]==-1);

            new_val_C3[ii] = get_msr_value (CPU_NUM, 1020, 63, 0, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[i],-1);
            CONTINUE_IF_TRUE(online_cpus[i]==-1);

            new_val_C6[ii] = get_msr_value (CPU_NUM, 1021, 63, 0, &error_indx);
            SET_IF_TRUE(error_indx,online_cpus[i],-1);
            CONTINUE_IF_TRUE(online_cpus[i]==-1);

        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
        new_val_C7[ii] = get_msr_value (CPU_NUM, 1022, 63, 0, &error_indx);
        SET_IF_TRUE(error_indx,online_cpus[i],-1);
            CONTINUE_IF_TRUE(online_cpus[i]==-1);
        }

        new_TSC[ii] = rdtsc ();

            if (old_val_CORE[ii] > new_val_CORE[ii]) {          //handle overflow
                CPU_CLK_UNHALTED_CORE = (UINT64_MAX - old_val_CORE[ii]) + new_val_CORE[ii];
            } else {
                CPU_CLK_UNHALTED_CORE = new_val_CORE[ii] - old_val_CORE[ii];
            }

            //number of TSC cycles while its in halted state
            if ((new_TSC[ii] - old_TSC[ii]) < CPU_CLK_UNHALTED_CORE) {
                CPU_CLK_C1 = 0;
            } else {
                CPU_CLK_C1 = ((new_TSC[ii] - old_TSC[ii]) - CPU_CLK_UNHALTED_CORE);
            }

            if (old_val_REF[ii] > new_val_REF[ii]) {            //handle overflow
                CPU_CLK_UNHALTED_REF = (UINT64_MAX - old_val_REF[ii]) + new_val_REF[ii];    //3.40282366921e38
            } else {
                CPU_CLK_UNHALTED_REF = new_val_REF[ii] - old_val_REF[ii];
            }

            if (old_val_C3[ii] > new_val_C3[ii]) {          //handle overflow
                CPU_CLK_C3 = (UINT64_MAX - old_val_C3[ii]) + new_val_C3[ii];
            } else {
                CPU_CLK_C3 = new_val_C3[ii] - old_val_C3[ii];
            }

            if (old_val_C6[ii] > new_val_C6[ii]) {          //handle overflow
                CPU_CLK_C6 = (UINT64_MAX - old_val_C6[ii]) + new_val_C6[ii];
            } else {
                CPU_CLK_C6 = new_val_C6[ii] - old_val_C6[ii];
            }

        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
            if (old_val_C7[ii] > new_val_C7[ii]) {          //handle overflow
                    CPU_CLK_C7 = (UINT64_MAX - old_val_C7[ii]) + new_val_C7[ii];
                } else {
                    CPU_CLK_C7 = new_val_C7[ii] - old_val_C7[ii];
                }
        }

            _FREQ[ii] =
                THRESHOLD_BETWEEN_0_6000(estimated_mhz * ((long double) CPU_CLK_UNHALTED_CORE /
                                         (long double) CPU_CLK_UNHALTED_REF));
            _MULT[ii] = _FREQ[ii] / BLCK;

            C0_time[ii] = ((long double) CPU_CLK_UNHALTED_REF /
                           (long double) (new_TSC[ii] - old_TSC[ii]));
            C1_time[ii] = ((long double) CPU_CLK_C1 /
                           (long double) (new_TSC[ii] - old_TSC[ii]));
            C3_time[ii] = ((long double) CPU_CLK_C3 /
                           (long double) (new_TSC[ii] - old_TSC[ii]));
            C6_time[ii] = ((long double) CPU_CLK_C6 /
                           (long double) (new_TSC[ii] - old_TSC[ii]));

        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
            C7_time[ii] = ((long double) CPU_CLK_C7 /
                               (long double) (new_TSC[ii] - old_TSC[ii]));
        }

        if (C0_time[ii] < 1e-2) {
                if (C0_time[ii] > 1e-4) {
                    C0_time[ii] = 0.01;
                } else {
                    C0_time[ii] = 0;
                }
            }
            if (C1_time[ii] < 1e-2) {
                if (C1_time[ii] > 1e-4) {
                    C1_time[ii] = 0.01;
                } else {
                    C1_time[ii] = 0;
                }
            }
            if (C3_time[ii] < 1e-2) {
                if (C3_time[ii] > 1e-4) {
                    C3_time[ii] = 0.01;
                } else {
                    C3_time[ii] = 0;
                }
            }
            if (C6_time[ii] < 1e-2) {
                if (C6_time[ii] > 1e-4) {
                    C6_time[ii] = 0.01;
                } else {
                    C6_time[ii] = 0;
                }
            }
        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
            if (C7_time[ii] < 1e-2) {
                   if (C7_time[ii] > 1e-4) {
                       C7_time[ii] = 0.01;
                   } else {
                       C7_time[ii] = 0;
                   }
                }
        }
    }
        //CHECK IF ALL COUNTERS ARE CORRECT AND NO GARBAGE VALUES ARE PRESENT
        //If there is any garbage values set print_core[i] to 0
        for (ii = 0; ii <  numCPUs; ii++) {
            assert(ii < MAX_SK_PROCESSORS);
            i = core_list[ii];
        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
          if ( !IS_THIS_BETWEEN_0_100(C0_time[i] * 100) ||
                    !IS_THIS_BETWEEN_0_100(C1_time[i] * 100 - (C3_time[i] + C6_time[i]) * 100) ||
                    !IS_THIS_BETWEEN_0_100(C3_time[i] * 100) ||
                    !IS_THIS_BETWEEN_0_100(C6_time[i] * 100) ||
                    !IS_THIS_BETWEEN_0_100(C7_time[i] * 100) ||
                    isinf(_FREQ[i]) )
                  print_core[ii]=0;
              else
                  print_core[ii]=1;
        }else{
          if ( !IS_THIS_BETWEEN_0_100(C0_time[i] * 100) ||
                    !IS_THIS_BETWEEN_0_100(C1_time[i] * 100 - (C3_time[i] + C6_time[i]) * 100) ||
                    !IS_THIS_BETWEEN_0_100(C3_time[i] * 100) ||
                    !IS_THIS_BETWEEN_0_100(C6_time[i] * 100) ||
                    isinf(_FREQ[i]) )
                  print_core[ii]=0;
              else
                  print_core[ii]=1;
        }
        }

        //Now print the information about the cores. Print garbage values message if there is garbage
        for (ii = 0; ii <  numCPUs; ii++) {
            assert(ii < MAX_SK_PROCESSORS);
            i = core_list[ii];

        if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
            //there is a bit of leeway to be had as the total counts might deviate
            //if this happens c1_time might be negative so just adjust so that it is thresholded to 0
            c1_time = C1_time[i] * 100 - (C3_time[i] + C6_time[i] + C7_time[i]) * 100;
            if (!isnan(c1_time) && !isinf(c1_time)) {
                    if (c1_time <= 0) {
            c1_time=0;
                    }
            }
            if (print_core[ii])
                    mvprintw (12 + ii + printw_offset, 0, "\tCore %d [%d]:\t  %0.2f (%.2fx)\t%4.3Lg\t%4.3Lg\t%4.3Lg\t%4.3Lg\t%4.3Lg\t%d\t%0.4f\n",
                          ii + 1, core_list[ii], _FREQ[i], _MULT[i], THRESHOLD_BETWEEN_0_100(C0_time[i] * 100),
                          THRESHOLD_BETWEEN_0_100(c1_time), THRESHOLD_BETWEEN_0_100(C3_time[i] * 100), THRESHOLD_BETWEEN_0_100(C6_time[i] * 100),THRESHOLD_BETWEEN_0_100(C7_time[i] * 100),
              Read_Thermal_Status_CPU(core_list[ii]),   //C0_time[i]*100+C1_time[i]*100 around 100
                          Read_Voltage_CPU(core_list[ii]));
            else
                    mvprintw (12 + ii + printw_offset, 0, "\tCore %d [%d]:\t  Garbage Values\n", ii + 1, core_list[ii]);
        }else{
            //there is a bit of leeway to be had as the total counts might deviate
            //if this happens c1_time might be negative so just adjust so that it is thresholded to 0
            c1_time = C1_time[i] * 100 - (C3_time[i] + C6_time[i]) * 100;
            if (!isnan(c1_time) && !isinf(c1_time)) {
                    if (c1_time <= 0) {
            c1_time=0;
                    }
            }
            if (print_core[ii])
                    mvprintw (12 + ii + printw_offset, 0, "\tCore %d [%d]:\t  %0.2f (%.2fx)\t%4.3Lg\t%4.3Lg\t%4.3Lg\t%4.3Lg\t%d\t%0.4f\n",
                          ii + 1, core_list[ii], _FREQ[i], _MULT[i], THRESHOLD_BETWEEN_0_100(C0_time[i] * 100),
                          THRESHOLD_BETWEEN_0_100(c1_time), THRESHOLD_BETWEEN_0_100(C3_time[i] * 100), THRESHOLD_BETWEEN_0_100(C6_time[i] * 100),Read_Thermal_Status_CPU(core_list[ii]),    //C0_time[i]*100+C1_time[i]*100 around 100
                          Read_Voltage_CPU(core_list[ii]));
            else
                    mvprintw (12 + ii + printw_offset, 0, "\tCore %d [%d]:\t  Garbage Values\n", ii + 1, core_list[ii]);
        }
        }

        /*k=0;
        for (ii = 00; ii < *max_observed_cpu; ii++)
        {
                if (in_core_list(ii,core_list)){
                    continue;
                }else{
                    mvprintw (12 + k + numCPUs + printw_offset, 0, "\tProcessor %d [%d]:  OFFLINE\n", k + numCPUs + 1, ii);
                }
                k++;
        }*/

        //FOR THE REST OF THE CORES (i.e. the offline cores+non-present cores=6 )
        //I have space allocated for 6 cores to be printed per socket so from all the present cores
        //till 6 print a blank line

        //for(ii=*max_observed_cpu; ii<6; ii++)
        for (ii = numCPUs; ii<6; ii++)
            mvprintw (12 + ii + printw_offset, 0, "\n");

        TRUE_CPU_FREQ = 0;

        logOpenFile_single();

        //time_t time_to_save;
        //logCpuFreq_single_d(time(&time_to_save));
        clock_gettime(CLOCK_REALTIME, &global_ts);
        logCpuFreq_single_ts( &global_ts);

        logCpuCstates_single_ts( &global_ts);

        for (ii = 0; ii < numCPUs; ii++) {
            assert(ii < MAX_SK_PROCESSORS);
            i = core_list[ii];
            if ( (_FREQ[i] > TRUE_CPU_FREQ) && (print_core[ii]) && !isinf(_FREQ[i]) ) {
                TRUE_CPU_FREQ = _FREQ[i];
            }
            if ( (print_core[ii]) && !isinf(_FREQ[i]) ) {
                logCpuFreq_single(_FREQ[i]);
            }
            logCpuCstates_single_c(" [");
            logCpuCstates_single((float)THRESHOLD_BETWEEN_0_100(C0_time[i] * 100));  logCpuCstates_single_c(",");
            c1_time = C1_time[i] * 100 - (C3_time[i] + C6_time[i] + C7_time[i]) * 100;
            logCpuCstates_single((float)THRESHOLD_BETWEEN_0_100(c1_time));           logCpuCstates_single_c(",");
            logCpuCstates_single((float)THRESHOLD_BETWEEN_0_100(C3_time[i] * 100));  logCpuCstates_single_c(",");
            logCpuCstates_single((float)THRESHOLD_BETWEEN_0_100(C6_time[i] * 100));
            if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
                logCpuCstates_single_c(",");
                logCpuCstates_single((float)THRESHOLD_BETWEEN_0_100(C7_time[i] * 100));
            }
            logCpuCstates_single_c("]\t");
        }
        //        logCpuCstates_single_c("\n");
        logCloseFile_single();

        mvprintw (10 + printw_offset, 0,
                  "  Real Current Frequency %0.2f MHz [%0.2f x %0.2f] (Max of below)\n", TRUE_CPU_FREQ, BLCK, TRUE_CPU_FREQ/BLCK);

        refresh ();

        //shift the new values to the old counter values
        //so that the next time we use those to find the difference
        memcpy (old_val_CORE, new_val_CORE,
                sizeof (*old_val_CORE) * numCPUs);
        memcpy (old_val_REF, new_val_REF, sizeof (*old_val_REF) * numCPUs);
        memcpy (old_val_C3, new_val_C3, sizeof (*old_val_C3) * numCPUs);
        memcpy (old_val_C6, new_val_C6, sizeof (*old_val_C6) * numCPUs);

    if(prog_options.i7_version.sandy_bridge || prog_options.i7_version.ivy_bridge || prog_options.i7_version.haswell){
        memcpy (old_val_C7, new_val_C7, sizeof (*old_val_C7) * numCPUs);
    }

        memcpy (tvstart, tvstop, sizeof (*tvstart) * numCPUs);
        memcpy (old_TSC, new_TSC, sizeof (*old_TSC) * numCPUs);
    } else {
        // If all the cores in the socket go offline, just erase the whole screen
        //WELL for single socket machine this code will never be executed. lol
        //atleast 1 core will be online so ...
        //for (ii = 0 ; ii<14; ii++)
        //    mvprintw (3 + ii + printw_offset, 0, "Ending up here\n");
        //print_socket_information(&socket_0);
    }

}
Пример #2
0
void
MyThread::run ()
{

	print_CPU_Hierarchy(chi);

    int i, ii;

    //MSR number and hi:low bit of that MSR
    //This msr contains a lot of stuff, per socket wise
    //one can pass any core number and then get in multiplier etc
    int PLATFORM_INFO_MSR = 206;	//CE 15:8
    int PLATFORM_INFO_MSR_low = 8;
    int PLATFORM_INFO_MSR_high = 15;

    ////To find out if Turbo is enabled use the below msr and bit 38
    ////bit for TURBO is 38
    ////msr reading is now moved into tubo_status
    //int IA32_MISC_ENABLE = 416;
    //int TURBO_FLAG_low = 38;
    //int TURBO_FLAG_high = 38;


    //int MSR_TURBO_RATIO_LIMIT = 429;

    int CPU_NUM;
    int CPU_Multiplier;
    float BLCK;
    char TURBO_MODE;

    printf("i7z DEBUG: GUI VERSION DOESN'T SUPPORT CORE OFFLINING\n");
    sleep (1);

    // 3B defines till Max 4 Core and the rest bit values from 32:63 were reserved.
    // int MAX_TURBO_1C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 7, 0);
    // int MAX_TURBO_2C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 15, 8);
    // int MAX_TURBO_3C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 23, 16);
    // int MAX_TURBO_4C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 31, 24);


    //CPUINFO is wrong for i7 but correct for the number of physical and logical cores present
    //If Hyperthreading is enabled then, multiple logical processors will share a common CORE ID
    //http://www.redhat.com/magazine/022aug06/departments/tips_tricks/
    system ("cat /proc/cpuinfo |grep MHz|sed 's/cpu\\sMHz\\s*:\\s//'|tail -n 1 > /tmp/cpufreq.txt");
    system ("grep \"core id\" /proc/cpuinfo |sort -|uniq -|wc -l > /tmp/numPhysical.txt");
    system ("grep \"processor\" /proc/cpuinfo |sort -|uniq -|wc -l > /tmp/numLogical.txt");


    //Open the parsed cpufreq file and obtain the cpufreq from /proc/cpuinfo
    FILE *tmp_file;
    tmp_file = fopen ("/tmp/cpufreq.txt", "r");
    char tmp_str[30];
    fgets (tmp_str, 30, tmp_file);
    double cpu_freq_cpuinfo = atof (tmp_str);
    fclose (tmp_file);

    unsigned int numPhysicalCores, numLogicalCores;
    numPhysicalCores = socket_0.num_physical_cores + socket_1.num_physical_cores;
    numLogicalCores  = socket_0.num_logical_cores + socket_1.num_logical_cores;
//    printf("My thread: Num Processors %d\n",numCPUs);

    int error_indx;

    //estimate the freq using the estimate_MHz() code that is almost mhz accurate
    cpu_freq_cpuinfo = estimate_MHz ();

    //We just need one CPU (we use Core-0) to figure out the multiplier and the bus clock freq.
    CPU_NUM = 0;
    CPU_Multiplier =
        get_msr_value (CPU_NUM, PLATFORM_INFO_MSR, PLATFORM_INFO_MSR_high,
                       PLATFORM_INFO_MSR_low, &error_indx);
    BLCK = cpu_freq_cpuinfo / CPU_Multiplier;
    TURBO_MODE = turbo_status ();	//get_msr_value(CPU_NUM,IA32_MISC_ENABLE, TURBO_FLAG_high,TURBO_FLAG_low);

    //to find how many cpus are enabled, we could have used sysconf but that will just give the logical numbers
    //if HT is enabled then the threads of the same core have the same C-state residency number so...
    //Its imperative to figure out the number of physical and number of logical cores.
    //sysconf(_SC_NPROCESSORS_ONLN);


    bool HT_ON;
    char HT_ON_str[30];
    if (numLogicalCores > numPhysicalCores)
    {
        strcpy (HT_ON_str, "Hyper Threading ON");
        HT_ON = true;
    }
    else
    {
        strcpy (HT_ON_str, "Hyper Threading OFF");
        HT_ON = false;
    }

    float TRUE_CPU_FREQ;
    if (TURBO_MODE == 1)
    {
        TRUE_CPU_FREQ = BLCK * ((double)CPU_Multiplier + 1);
    }
    else
    {
        TRUE_CPU_FREQ = BLCK * ((double)CPU_Multiplier);
    }


    int IA32_PERF_GLOBAL_CTRL = 911;	//3BF
    int IA32_PERF_GLOBAL_CTRL_Value =
        get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0,&error_indx);
    int IA32_FIXED_CTR_CTL = 909;	//38D
    int IA32_FIXED_CTR_CTL_Value =
        get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0,&error_indx);

    //printf("IA32_PERF_GLOBAL_CTRL %d\n",IA32_PERF_GLOBAL_CTRL_Value);
    //printf("IA32_FIXED_CTR_CTL %d\n",IA32_FIXED_CTR_CTL_Value);

    unsigned long long int CPU_CLK_UNHALTED_CORE, CPU_CLK_UNHALTED_REF,
    CPU_CLK_C3, CPU_CLK_C6, CPU_CLK_C1;

    CPU_CLK_UNHALTED_CORE = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx);
    CPU_CLK_UNHALTED_REF = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx);

    unsigned long long int old_val_CORE[numCPUs], new_val_CORE[numCPUs];
    unsigned long long int old_val_REF[numCPUs], new_val_REF[numCPUs];
    unsigned long long int old_val_C3[numCPUs], new_val_C3[numCPUs];
    unsigned long long int old_val_C6[numCPUs], new_val_C6[numCPUs];
//  unsigned long int old_val_C1[numCPUs], new_val_C1[numCPUs];

    unsigned long long int old_TSC[numCPUs], new_TSC[numCPUs];

    struct timeval tvstart[numCPUs], tvstop[numCPUs];

    struct timespec one_second_sleep;
    one_second_sleep.tv_sec = 0;
    one_second_sleep.tv_nsec = 999999999;	// 1000msec


    unsigned long int IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0,&error_indx);
    unsigned long int IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0,&error_indx);
//   mvprintw(12,0,"Wait...\n"); refresh();
    nanosleep (&one_second_sleep, NULL);
    IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0, &error_indx) - IA32_MPERF;
    IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0, &error_indx) - IA32_APERF;

    //printf("Diff. i n APERF = %u, MPERF = %d\n", IA32_MPERF, IA32_APERF);

    long double C0_time[numCPUs], C1_time[numCPUs], C3_time[numCPUs],    C6_time[numCPUs];
    double _FREQ[numCPUs], _MULT[numCPUs];

//  mvprintw(12,0,"Current Freqs\n");

    int kk=11;
    double estimated_mhz;
    for (;;)
    {
        Construct_Socket_Information_in_GUI(&numCPUs);

        if (kk>10) {
            kk=0;
            for (ii = 0; ii < (int)numCPUs; ii++)
            {
                CPU_NUM = core_list[ii];
                IA32_PERF_GLOBAL_CTRL_Value =	get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
                set_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 0x700000003LLU);

                IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);
                set_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 819);

                IA32_PERF_GLOBAL_CTRL_Value =	get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
                IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);

                old_val_CORE[ii] = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx);
                old_val_REF[ii] = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx);
                old_val_C3[ii] = get_msr_value (CPU_NUM, 1020, 63, 0,&error_indx);
                old_val_C6[ii] = get_msr_value (CPU_NUM, 1021, 63, 0,&error_indx);
                old_TSC[ii] = rdtsc ();
            }
        }
        kk++;

        nanosleep (&one_second_sleep, NULL);

        estimated_mhz = estimate_MHz();

        for (i = 0; i < (int)numCPUs; i++)
        {
            CPU_NUM = core_list[i];
            new_val_CORE[i] = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx);
            new_val_REF[i] = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx);
            new_val_C3[i] = get_msr_value (CPU_NUM, 1020, 63, 0,&error_indx);
            new_val_C6[i] = get_msr_value (CPU_NUM, 1021, 63, 0,&error_indx);
            new_TSC[i] = rdtsc ();
            if (old_val_CORE[i] > new_val_CORE[i])
            {
                CPU_CLK_UNHALTED_CORE =
                    (3.40282366921e38 - old_val_CORE[i]) + new_val_CORE[i];
            }
            else
            {
                CPU_CLK_UNHALTED_CORE = new_val_CORE[i] - old_val_CORE[i];
            }

            //number of TSC cycles while its in halted state
            if ((new_TSC[i] - old_TSC[i]) < CPU_CLK_UNHALTED_CORE)
                CPU_CLK_C1 = 0;
            else
                CPU_CLK_C1 = ((new_TSC[i] - old_TSC[i]) - CPU_CLK_UNHALTED_CORE);

            if (old_val_REF[i] > new_val_REF[i])
            {
                CPU_CLK_UNHALTED_REF =
                    (3.40282366921e38 - old_val_REF[i]) + new_val_REF[i];
            }
            else
            {
                CPU_CLK_UNHALTED_REF = new_val_REF[i] - old_val_REF[i];
            }

            if (old_val_C3[i] > new_val_C3[i])
            {
                CPU_CLK_C3 = (3.40282366921e38 - old_val_C3[i]) + new_val_C3[i];
            }
            else
            {
                CPU_CLK_C3 = new_val_C3[i] - old_val_C3[i];
            }

            if (old_val_C6[i] > new_val_C6[i])
            {
                CPU_CLK_C6 = (3.40282366921e38 - old_val_C6[i]) + new_val_C6[i];
            }
            else
            {
                CPU_CLK_C6 = new_val_C6[i] - old_val_C6[i];
            }

            _FREQ[i] =
                estimated_mhz * ((long double) CPU_CLK_UNHALTED_CORE /
                                 (long double) CPU_CLK_UNHALTED_REF);
            _MULT[i] = _FREQ[i] / BLCK;

            C0_time[i] =
                ((long double) CPU_CLK_UNHALTED_REF /
                 (long double) (new_TSC[i] - old_TSC[i]));
            long double c1_time =
                ((long double) CPU_CLK_C1 /
                 (long double) (new_TSC[i] - old_TSC[i]));
            C3_time[i] =
                ((long double) CPU_CLK_C3 /
                 (long double) (new_TSC[i] - old_TSC[i]));
            C6_time[i] =
                ((long double) CPU_CLK_C6 /
                 (long double) (new_TSC[i] - old_TSC[i]));

            //C1_time[i] -= C3_time[i] + C6_time[i];
			C1_time[i] = c1_time - (C3_time[i] + C6_time[i]) ;
            if (!isnan(c1_time) && !isinf(c1_time)) {
                if (C1_time[i] <= 0) {
                    C1_time[i]=0;
                }
            }

            if (C0_time[i] < 1e-2) {
                if (C0_time[i] > 1e-4) 	C0_time[i] = 0.01;
                else				    C0_time[i] = 0;
            }

            if (C1_time[i] < 1e-2) {
                if (C1_time[i] > 1e-4)  C1_time[i] = 0.01;
                else				    C1_time[i] = 0;
            }

            if (C3_time[i] < 1e-2) {
                if (C3_time[i] > 1e-4)  C3_time[i] = 0.01;
                else			        C3_time[i] = 0;
            }

            if (C6_time[i] < 1e-2) {
                if (C6_time[i] > 1e-4)  C6_time[i] = 0.01;
                else			        C6_time[i] = 0;
            }
        }
//   printf("Hello");
//   for(i=0;i<numCPUs;i++){
//      printf("%g %Lg %Lg %Lg %Lg %lld %llu\n",_FREQ[i],C0_time[i]*100,C1_time[i]*100,C3_time[i]*100,C6_time[i]*100,CPU_CLK_UNHALTED_REF,(new_TSC[i] - old_TSC[i]));
//      printf("%g %llu %llu %llu %llu %llu\n",_FREQ[i],CPU_CLK_UNHALTED_REF,CPU_CLK_C1,CPU_CLK_C3,CPU_CLK_C6,(new_TSC[i] - old_TSC[i]));
//      printf("%llu %llu  %lld\n",new_TSC[i], old_TSC[i],new_TSC[i]- old_TSC[i]);
//   }
        TRUE_CPU_FREQ = 0;
        for (i = 0; i < (int)numCPUs; i++)
            if (_FREQ[i] > TRUE_CPU_FREQ)
                TRUE_CPU_FREQ = _FREQ[i];

        memcpy (old_val_CORE, new_val_CORE, sizeof (unsigned long int) * numCPUs);
        memcpy (old_val_REF, new_val_REF, sizeof (unsigned long int) * numCPUs);
        memcpy (old_val_C3, new_val_C3, sizeof (unsigned long int) * numCPUs);
        memcpy (old_val_C6, new_val_C6, sizeof (unsigned long int) * numCPUs);
        memcpy (tvstart, tvstop, sizeof (struct timeval) * numCPUs);
        memcpy (old_TSC, new_TSC, sizeof (unsigned long long int) * numCPUs);

        memcpy (FREQ, _FREQ, sizeof (double) * numCPUs);
        memcpy (MULT, _MULT, sizeof (double) * numCPUs);
        memcpy (C0_TIME, C0_time, sizeof (long double) * numCPUs);
        memcpy (C1_TIME, C1_time, sizeof (long double) * numCPUs);
        memcpy (C3_TIME, C3_time, sizeof (long double) * numCPUs);
        memcpy (C6_TIME, C6_time, sizeof (long double) * numCPUs);
        global_in_i7z_main_thread = true;
    }

}