Esempio n. 1
0
void print_i7z_single ()
{
    struct cpu_heirarchy_info chi;
    struct cpu_socket_info socket_0={.max_cpu=0, .socket_num=0, .processor_num={-1,-1,-1,-1,-1,-1,-1,-1}};
    struct cpu_socket_info socket_1={.max_cpu=0, .socket_num=1, .processor_num={-1,-1,-1,-1,-1,-1,-1,-1}};

    construct_CPU_Heirarchy_info(&chi);
    construct_sibling_list(&chi);
//      print_CPU_Heirarchy(chi);
    construct_socket_information(&chi, &socket_0, &socket_1, socket_0_num, socket_1_num);
//    print_socket_information(&socket_0);
//    print_socket_information(&socket_1);

    int printw_offset = (0) * 14;

    //Make an array size max 8 (to accomdate Nehalem-EXEX -lol) to store the core-num that are candidates for a given socket
    //removing it from here as it is already allocated in the function
    //int *core_list, core_list_size_phy, core_list_size_log;

    //iterator
    int i;

    //turbo_mode enabled/disabled flag
    char TURBO_MODE;

    double cpu_freq_cpuinfo;

    cpu_freq_cpuinfo = cpufreq_info ();
    //estimate the freq using the estimate_MHz() code that is almost mhz accurate
    cpu_freq_cpuinfo = estimate_MHz ();

    //Print a slew of information on the ncurses window
    //I already print that in the loop so..
    mvprintw (0, 0, "WAIT .... ");


    //estimate the freq using the estimate_MHz() code that is almost mhz accurate
    cpu_freq_cpuinfo = estimate_MHz ();
    mvprintw (3, 0, "True Frequency (without accounting Turbo) %0.0f MHz\n",
              cpu_freq_cpuinfo);


    //MSR number and hi:low bit of that MSR
    //This msr contains a lot of stuff, per socket wise
    //one can pass any core number and then get in multiplier etc
    int PLATFORM_INFO_MSR = 206;    //CE 15:8
    int PLATFORM_INFO_MSR_low = 8;
    int PLATFORM_INFO_MSR_high = 15;

    unsigned long long int old_val_CORE[2][numCPUs_max], new_val_CORE[2][numCPUs_max];
    unsigned long long int old_val_REF[2][numCPUs_max], new_val_REF[2][numCPUs_max];
    unsigned long long int old_val_C3[2][numCPUs_max], new_val_C3[2][numCPUs_max];
    unsigned long long int old_val_C6[2][numCPUs_max], new_val_C6[2][numCPUs_max];
    unsigned long long int old_val_C7[2][numCPUs_max], new_val_C7[2][numCPUs_max];

    unsigned long long int old_TSC[2][numCPUs_max], new_TSC[2][numCPUs_max];
    long double C0_time[2][numCPUs_max], C1_time[2][numCPUs_max],
    C3_time[2][numCPUs_max], C6_time[2][numCPUs_max], C7_time[2][numCPUs_max];
    double _FREQ[2][numCPUs_max], _MULT[2][numCPUs_max];
    struct timeval tvstart[2][numCPUs_max], tvstop[2][numCPUs_max];

    struct timespec one_second_sleep;
    one_second_sleep.tv_sec = 0;
    one_second_sleep.tv_nsec = 499999999;   // 500msec



    //Get turbo mode status by reading msr within turbo_status
    TURBO_MODE = turbo_status ();

    //Flags and other things about HT.
    int HT_ON;
    char HT_ON_str[30];

    int kk_1 = 11;

    //below variables is used to monitor if any cores went offline etc.
    int online_cpus[MAX_PROCESSORS]; //Max 2 x Nehalem-EX with total 32 threads

    double estimated_mhz=0;
    int socket_num;

    //below variables stores how many cpus were observed till date for the socket
    int max_cpus_observed=0;

    for (;;) {
        construct_CPU_Heirarchy_info(&chi);
        construct_sibling_list(&chi);
        construct_socket_information(&chi, &socket_0, &socket_1, socket_0_num, socket_1_num);


        //HT enabled if num logical > num physical cores
        if (chi.HT==1) {
            strncpy (HT_ON_str, "Hyper Threading ON\0", 30);
            HT_ON = 1;
        } else {
            strncpy (HT_ON_str, "Hyper Threading OFF\0", 30);
            HT_ON = 0;
        }

        refresh ();

        SET_ONLINE_ARRAY_PLUS1(online_cpus)

        //In the function calls below socket_num is set to the socket to print for
        //printw_offset is the offset gap between the printing of the two sockets
        //kk_1 and kk_2 are the variables that have to be set, i have to use them internally
        //so in future if there are more sockets to be printed, add more kk_*
        socket_num=0;
        printw_offset=0;

        //printf("socket0 max cpu %d\n",socket_0.max_cpu);
        //printf("socket1 max cpu %d\n",socket_0.max_cpu);


        //below code in (else case) is to handle when for 2 sockets system, cpu1 is populated and cpu0 is empty.
        //single socket code but in an intelligent manner and not assuming that cpu0 is always populated before cpu1
        if(socket_0.max_cpu>1){
            socket_num=0;
            print_i7z_socket_single(socket_0, printw_offset, PLATFORM_INFO_MSR,  PLATFORM_INFO_MSR_high, PLATFORM_INFO_MSR_low,
                                online_cpus, cpu_freq_cpuinfo, one_second_sleep, TURBO_MODE, HT_ON_str, &kk_1, old_val_CORE[socket_num],
                                old_val_REF[socket_num], old_val_C3[socket_num], old_val_C6[socket_num],old_val_C7[socket_num],
                                old_TSC[socket_num], estimated_mhz, new_val_CORE[socket_num], new_val_REF[socket_num], new_val_C3[socket_num],
                                new_val_C6[socket_num],new_val_C7[socket_num], new_TSC[socket_num], _FREQ[socket_num], _MULT[socket_num], C0_time[socket_num], C1_time[socket_num],
                                C3_time[socket_num], C6_time[socket_num],C7_time[socket_num], tvstart[socket_num], tvstop[socket_num], &max_cpus_observed);
    }else{
        socket_num=1;
        print_i7z_socket_single(socket_1, printw_offset, PLATFORM_INFO_MSR,  PLATFORM_INFO_MSR_high, PLATFORM_INFO_MSR_low,
                        online_cpus, cpu_freq_cpuinfo, one_second_sleep, TURBO_MODE, HT_ON_str, &kk_1, old_val_CORE[socket_num],
                                old_val_REF[socket_num], old_val_C3[socket_num], old_val_C6[socket_num],old_val_C7[socket_num],
                                old_TSC[socket_num], estimated_mhz, new_val_CORE[socket_num], new_val_REF[socket_num], new_val_C3[socket_num],
                                new_val_C6[socket_num],new_val_C7[socket_num], new_TSC[socket_num], _FREQ[socket_num], _MULT[socket_num], C0_time[socket_num], C1_time[socket_num],
                                C3_time[socket_num], C6_time[socket_num],C7_time[socket_num], tvstart[socket_num], tvstop[socket_num], &max_cpus_observed);
    }
    }

}
Esempio n. 2
0
void
MyThread::run ()
{

	print_CPU_Hierarchy(chi);

    int i, ii;

    //MSR number and hi:low bit of that MSR
    //This msr contains a lot of stuff, per socket wise
    //one can pass any core number and then get in multiplier etc
    int PLATFORM_INFO_MSR = 206;	//CE 15:8
    int PLATFORM_INFO_MSR_low = 8;
    int PLATFORM_INFO_MSR_high = 15;

    ////To find out if Turbo is enabled use the below msr and bit 38
    ////bit for TURBO is 38
    ////msr reading is now moved into tubo_status
    //int IA32_MISC_ENABLE = 416;
    //int TURBO_FLAG_low = 38;
    //int TURBO_FLAG_high = 38;


    //int MSR_TURBO_RATIO_LIMIT = 429;

    int CPU_NUM;
    int CPU_Multiplier;
    float BLCK;
    char TURBO_MODE;

    printf("i7z DEBUG: GUI VERSION DOESN'T SUPPORT CORE OFFLINING\n");
    sleep (1);

    // 3B defines till Max 4 Core and the rest bit values from 32:63 were reserved.
    // int MAX_TURBO_1C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 7, 0);
    // int MAX_TURBO_2C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 15, 8);
    // int MAX_TURBO_3C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 23, 16);
    // int MAX_TURBO_4C = get_msr_value (CPU_NUM, MSR_TURBO_RATIO_LIMIT, 31, 24);


    //CPUINFO is wrong for i7 but correct for the number of physical and logical cores present
    //If Hyperthreading is enabled then, multiple logical processors will share a common CORE ID
    //http://www.redhat.com/magazine/022aug06/departments/tips_tricks/
    system ("cat /proc/cpuinfo |grep MHz|sed 's/cpu\\sMHz\\s*:\\s//'|tail -n 1 > /tmp/cpufreq.txt");
    system ("grep \"core id\" /proc/cpuinfo |sort -|uniq -|wc -l > /tmp/numPhysical.txt");
    system ("grep \"processor\" /proc/cpuinfo |sort -|uniq -|wc -l > /tmp/numLogical.txt");


    //Open the parsed cpufreq file and obtain the cpufreq from /proc/cpuinfo
    FILE *tmp_file;
    tmp_file = fopen ("/tmp/cpufreq.txt", "r");
    char tmp_str[30];
    fgets (tmp_str, 30, tmp_file);
    double cpu_freq_cpuinfo = atof (tmp_str);
    fclose (tmp_file);

    unsigned int numPhysicalCores, numLogicalCores;
    numPhysicalCores = socket_0.num_physical_cores + socket_1.num_physical_cores;
    numLogicalCores  = socket_0.num_logical_cores + socket_1.num_logical_cores;
//    printf("My thread: Num Processors %d\n",numCPUs);

    int error_indx;

    //estimate the freq using the estimate_MHz() code that is almost mhz accurate
    cpu_freq_cpuinfo = estimate_MHz ();

    //We just need one CPU (we use Core-0) to figure out the multiplier and the bus clock freq.
    CPU_NUM = 0;
    CPU_Multiplier =
        get_msr_value (CPU_NUM, PLATFORM_INFO_MSR, PLATFORM_INFO_MSR_high,
                       PLATFORM_INFO_MSR_low, &error_indx);
    BLCK = cpu_freq_cpuinfo / CPU_Multiplier;
    TURBO_MODE = turbo_status ();	//get_msr_value(CPU_NUM,IA32_MISC_ENABLE, TURBO_FLAG_high,TURBO_FLAG_low);

    //to find how many cpus are enabled, we could have used sysconf but that will just give the logical numbers
    //if HT is enabled then the threads of the same core have the same C-state residency number so...
    //Its imperative to figure out the number of physical and number of logical cores.
    //sysconf(_SC_NPROCESSORS_ONLN);


    bool HT_ON;
    char HT_ON_str[30];
    if (numLogicalCores > numPhysicalCores)
    {
        strcpy (HT_ON_str, "Hyper Threading ON");
        HT_ON = true;
    }
    else
    {
        strcpy (HT_ON_str, "Hyper Threading OFF");
        HT_ON = false;
    }

    float TRUE_CPU_FREQ;
    if (TURBO_MODE == 1)
    {
        TRUE_CPU_FREQ = BLCK * ((double)CPU_Multiplier + 1);
    }
    else
    {
        TRUE_CPU_FREQ = BLCK * ((double)CPU_Multiplier);
    }


    int IA32_PERF_GLOBAL_CTRL = 911;	//3BF
    int IA32_PERF_GLOBAL_CTRL_Value =
        get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0,&error_indx);
    int IA32_FIXED_CTR_CTL = 909;	//38D
    int IA32_FIXED_CTR_CTL_Value =
        get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0,&error_indx);

    //printf("IA32_PERF_GLOBAL_CTRL %d\n",IA32_PERF_GLOBAL_CTRL_Value);
    //printf("IA32_FIXED_CTR_CTL %d\n",IA32_FIXED_CTR_CTL_Value);

    unsigned long long int CPU_CLK_UNHALTED_CORE, CPU_CLK_UNHALTED_REF,
    CPU_CLK_C3, CPU_CLK_C6, CPU_CLK_C1;

    CPU_CLK_UNHALTED_CORE = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx);
    CPU_CLK_UNHALTED_REF = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx);

    unsigned long long int old_val_CORE[numCPUs], new_val_CORE[numCPUs];
    unsigned long long int old_val_REF[numCPUs], new_val_REF[numCPUs];
    unsigned long long int old_val_C3[numCPUs], new_val_C3[numCPUs];
    unsigned long long int old_val_C6[numCPUs], new_val_C6[numCPUs];
//  unsigned long int old_val_C1[numCPUs], new_val_C1[numCPUs];

    unsigned long long int old_TSC[numCPUs], new_TSC[numCPUs];

    struct timeval tvstart[numCPUs], tvstop[numCPUs];

    struct timespec one_second_sleep;
    one_second_sleep.tv_sec = 0;
    one_second_sleep.tv_nsec = 999999999;	// 1000msec


    unsigned long int IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0,&error_indx);
    unsigned long int IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0,&error_indx);
//   mvprintw(12,0,"Wait...\n"); refresh();
    nanosleep (&one_second_sleep, NULL);
    IA32_MPERF = get_msr_value (CPU_NUM, 231, 7, 0, &error_indx) - IA32_MPERF;
    IA32_APERF = get_msr_value (CPU_NUM, 232, 7, 0, &error_indx) - IA32_APERF;

    //printf("Diff. i n APERF = %u, MPERF = %d\n", IA32_MPERF, IA32_APERF);

    long double C0_time[numCPUs], C1_time[numCPUs], C3_time[numCPUs],    C6_time[numCPUs];
    double _FREQ[numCPUs], _MULT[numCPUs];

//  mvprintw(12,0,"Current Freqs\n");

    int kk=11;
    double estimated_mhz;
    for (;;)
    {
        Construct_Socket_Information_in_GUI(&numCPUs);

        if (kk>10) {
            kk=0;
            for (ii = 0; ii < (int)numCPUs; ii++)
            {
                CPU_NUM = core_list[ii];
                IA32_PERF_GLOBAL_CTRL_Value =	get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
                set_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 0x700000003LLU);

                IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);
                set_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 819);

                IA32_PERF_GLOBAL_CTRL_Value =	get_msr_value (CPU_NUM, IA32_PERF_GLOBAL_CTRL, 63, 0, &error_indx);
                IA32_FIXED_CTR_CTL_Value = get_msr_value (CPU_NUM, IA32_FIXED_CTR_CTL, 63, 0, &error_indx);

                old_val_CORE[ii] = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx);
                old_val_REF[ii] = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx);
                old_val_C3[ii] = get_msr_value (CPU_NUM, 1020, 63, 0,&error_indx);
                old_val_C6[ii] = get_msr_value (CPU_NUM, 1021, 63, 0,&error_indx);
                old_TSC[ii] = rdtsc ();
            }
        }
        kk++;

        nanosleep (&one_second_sleep, NULL);

        estimated_mhz = estimate_MHz();

        for (i = 0; i < (int)numCPUs; i++)
        {
            CPU_NUM = core_list[i];
            new_val_CORE[i] = get_msr_value (CPU_NUM, 778, 63, 0,&error_indx);
            new_val_REF[i] = get_msr_value (CPU_NUM, 779, 63, 0,&error_indx);
            new_val_C3[i] = get_msr_value (CPU_NUM, 1020, 63, 0,&error_indx);
            new_val_C6[i] = get_msr_value (CPU_NUM, 1021, 63, 0,&error_indx);
            new_TSC[i] = rdtsc ();
            if (old_val_CORE[i] > new_val_CORE[i])
            {
                CPU_CLK_UNHALTED_CORE =
                    (3.40282366921e38 - old_val_CORE[i]) + new_val_CORE[i];
            }
            else
            {
                CPU_CLK_UNHALTED_CORE = new_val_CORE[i] - old_val_CORE[i];
            }

            //number of TSC cycles while its in halted state
            if ((new_TSC[i] - old_TSC[i]) < CPU_CLK_UNHALTED_CORE)
                CPU_CLK_C1 = 0;
            else
                CPU_CLK_C1 = ((new_TSC[i] - old_TSC[i]) - CPU_CLK_UNHALTED_CORE);

            if (old_val_REF[i] > new_val_REF[i])
            {
                CPU_CLK_UNHALTED_REF =
                    (3.40282366921e38 - old_val_REF[i]) + new_val_REF[i];
            }
            else
            {
                CPU_CLK_UNHALTED_REF = new_val_REF[i] - old_val_REF[i];
            }

            if (old_val_C3[i] > new_val_C3[i])
            {
                CPU_CLK_C3 = (3.40282366921e38 - old_val_C3[i]) + new_val_C3[i];
            }
            else
            {
                CPU_CLK_C3 = new_val_C3[i] - old_val_C3[i];
            }

            if (old_val_C6[i] > new_val_C6[i])
            {
                CPU_CLK_C6 = (3.40282366921e38 - old_val_C6[i]) + new_val_C6[i];
            }
            else
            {
                CPU_CLK_C6 = new_val_C6[i] - old_val_C6[i];
            }

            _FREQ[i] =
                estimated_mhz * ((long double) CPU_CLK_UNHALTED_CORE /
                                 (long double) CPU_CLK_UNHALTED_REF);
            _MULT[i] = _FREQ[i] / BLCK;

            C0_time[i] =
                ((long double) CPU_CLK_UNHALTED_REF /
                 (long double) (new_TSC[i] - old_TSC[i]));
            long double c1_time =
                ((long double) CPU_CLK_C1 /
                 (long double) (new_TSC[i] - old_TSC[i]));
            C3_time[i] =
                ((long double) CPU_CLK_C3 /
                 (long double) (new_TSC[i] - old_TSC[i]));
            C6_time[i] =
                ((long double) CPU_CLK_C6 /
                 (long double) (new_TSC[i] - old_TSC[i]));

            //C1_time[i] -= C3_time[i] + C6_time[i];
			C1_time[i] = c1_time - (C3_time[i] + C6_time[i]) ;
            if (!isnan(c1_time) && !isinf(c1_time)) {
                if (C1_time[i] <= 0) {
                    C1_time[i]=0;
                }
            }

            if (C0_time[i] < 1e-2) {
                if (C0_time[i] > 1e-4) 	C0_time[i] = 0.01;
                else				    C0_time[i] = 0;
            }

            if (C1_time[i] < 1e-2) {
                if (C1_time[i] > 1e-4)  C1_time[i] = 0.01;
                else				    C1_time[i] = 0;
            }

            if (C3_time[i] < 1e-2) {
                if (C3_time[i] > 1e-4)  C3_time[i] = 0.01;
                else			        C3_time[i] = 0;
            }

            if (C6_time[i] < 1e-2) {
                if (C6_time[i] > 1e-4)  C6_time[i] = 0.01;
                else			        C6_time[i] = 0;
            }
        }
//   printf("Hello");
//   for(i=0;i<numCPUs;i++){
//      printf("%g %Lg %Lg %Lg %Lg %lld %llu\n",_FREQ[i],C0_time[i]*100,C1_time[i]*100,C3_time[i]*100,C6_time[i]*100,CPU_CLK_UNHALTED_REF,(new_TSC[i] - old_TSC[i]));
//      printf("%g %llu %llu %llu %llu %llu\n",_FREQ[i],CPU_CLK_UNHALTED_REF,CPU_CLK_C1,CPU_CLK_C3,CPU_CLK_C6,(new_TSC[i] - old_TSC[i]));
//      printf("%llu %llu  %lld\n",new_TSC[i], old_TSC[i],new_TSC[i]- old_TSC[i]);
//   }
        TRUE_CPU_FREQ = 0;
        for (i = 0; i < (int)numCPUs; i++)
            if (_FREQ[i] > TRUE_CPU_FREQ)
                TRUE_CPU_FREQ = _FREQ[i];

        memcpy (old_val_CORE, new_val_CORE, sizeof (unsigned long int) * numCPUs);
        memcpy (old_val_REF, new_val_REF, sizeof (unsigned long int) * numCPUs);
        memcpy (old_val_C3, new_val_C3, sizeof (unsigned long int) * numCPUs);
        memcpy (old_val_C6, new_val_C6, sizeof (unsigned long int) * numCPUs);
        memcpy (tvstart, tvstop, sizeof (struct timeval) * numCPUs);
        memcpy (old_TSC, new_TSC, sizeof (unsigned long long int) * numCPUs);

        memcpy (FREQ, _FREQ, sizeof (double) * numCPUs);
        memcpy (MULT, _MULT, sizeof (double) * numCPUs);
        memcpy (C0_TIME, C0_time, sizeof (long double) * numCPUs);
        memcpy (C1_TIME, C1_time, sizeof (long double) * numCPUs);
        memcpy (C3_TIME, C3_time, sizeof (long double) * numCPUs);
        memcpy (C6_TIME, C6_time, sizeof (long double) * numCPUs);
        global_in_i7z_main_thread = true;
    }

}