void get_calibration_values(long double *loops, long double *pages, char *membuf, unsigned long npages) { const int ntests = 100; long double loops_values[ntests]; long double pages_values[ntests]; for (int i = 0; i < ntests ; i++) { loops_values[i] = get_cpu_speed(); pages_values[i] = get_mem_speed(membuf, npages); } long double sum = 0; fprintf(stderr, "loops_values: "); for (int i = 0; i < ntests ; i++) { fprintf(stderr, "%Lf ", loops_values[i]); sum += loops_values[i]; } fprintf(stderr, "\n"); *loops = sum / ntests; fprintf(stderr, "loops_values: avg %Lf\n", *loops); sum = 0; fprintf(stderr, "pages_values: "); for (int i = 0; i < ntests ; i++) { fprintf(stderr, "%Lf ", pages_values[i]); sum += pages_values[i]; } fprintf(stderr, "\n"); *pages = sum / ntests; fprintf(stderr, "pages_values: avg %Lf\n", *pages); }
int lib_get_sysinfo(void) { int ret; /* Get the CPU speed (for delays). */ lib_sysinfo.cpu_khz = get_cpu_speed(); #if IS_ENABLED(CONFIG_LP_MULTIBOOT) /* Get the information from the multiboot tables, * if they exist */ get_multiboot_info(&lib_sysinfo); #endif /* Get information from the coreboot tables, * if they exist */ ret = get_coreboot_info(&lib_sysinfo); if (!lib_sysinfo.n_memranges) { /* If we can't get a good memory range, use the default. */ lib_sysinfo.n_memranges = 2; lib_sysinfo.memrange[0].base = 0; lib_sysinfo.memrange[0].size = 640 * 1024; lib_sysinfo.memrange[0].type = CB_MEM_RAM; lib_sysinfo.memrange[1].base = 1024 * 1024; lib_sysinfo.memrange[1].size = 31 * 1024 * 1024; lib_sysinfo.memrange[1].type = CB_MEM_RAM; } return ret; }
int main() { ticks tks; unsigned long mem; double secs; // system ram mem=get_totalram(); printf("\nRAM installed: %ld",mem); mem=get_freeram(); printf("\nRAM available: %ld",mem); // cpu speed tks = ticks_start(); start_timer(); double speed = get_cpu_speed(); tks = ticks_stop()-tks; secs=stop_timer(); printf("\nCPU-speed (GHz): %f",speed); printf("\nTicks for call: %ld",(long)tks); printf("\nSecs for call: %f",secs); printf("\n"); }
int lib_get_sysinfo(void) { int ret; /* Get the CPU speed (for delays). */ lib_sysinfo.cpu_khz = get_cpu_speed(); /* Get information from the coreboot tables, * if they exist */ ret = get_coreboot_info(&lib_sysinfo); /* If we can't get a good memory range, use the default. */ if (!lib_sysinfo.n_memranges) { lib_sysinfo.n_memranges = 1; lib_sysinfo.memrange[0].base = 0; lib_sysinfo.memrange[0].size = 1024 * 1024; lib_sysinfo.memrange[0].type = CB_MEM_RAM; } return ret; }
int main(int argc, char **argv) { long double loops_per_second = -1; long double pages_per_second = -1; int skip_calibration = 0; bool fill_random = true; const char *mempath = NULL; for (;;) { int oindex = 0; int c = getopt_long_only(argc, argv, "CMc:m:b:Np:", longopts, &oindex); if (c == -1) break; switch (c) { case 'C': cmd = cmd_calibrate; break; case 'M': cmd = cmd_makeload; break; case 'c': loops_per_second = atof(optarg); skip_calibration = 1; break; case 'm': pages_per_second = atof(optarg); skip_calibration = 1; break; case 'b': cpu_bound = atof(optarg); break; case 'N': /* Using zero-filled data may be a bad idea? We * observed that the above memory touch * operation takes different duerations for a * zero-filled value and a random value. * * With zero-filled data, we got higher * cpu loads than the target level. **/ fill_random = false; break; case 'p': mempath = optarg; break; default: fprintf(stderr, "command line parse error\n"); exit(EXIT_FAILURE); } } if (cmd == cmd_calibrate) { if (argc - optind != 1) { /* ./a.out [size (mbytes)] */ show_help(argv[0]); exit(EXIT_FAILURE); } } else if (cmd == cmd_makeload) { if (argc - optind != 2) { /* ./a.out [size (mbytes)] [speed (mbytes/s)] */ show_help(argv[0]); exit(EXIT_FAILURE); } } else { show_help(argv[0]); exit(EXIT_FAILURE); } const unsigned long memsize = 1UL * atoi(argv[optind]) * 1024 * 1024; const unsigned long membuf_npages = memsize / 4096; char *membuf = alloc_scratch_space(memsize, mempath, fill_random); if (cmd == cmd_calibrate) { /* The first pair skips page table creation. */ get_cpu_speed(); get_mem_speed(membuf, membuf_npages); get_calibration_values(&loops_per_second, &pages_per_second, membuf, membuf_npages); printf("--cpu-speed %Lf --mem-speed %Lf\n", loops_per_second, pages_per_second); exit(EXIT_SUCCESS); } if (skip_calibration) { if (loops_per_second < 0 || pages_per_second < 0) { fprintf(stderr, "need both the speeds of cpu and mem\n"); exit(EXIT_FAILURE); } } else { /* The first pair skips page table creation. */ for (int i = 0; i < 100; i++) { get_cpu_speed(); get_mem_speed(membuf, membuf_npages); } // get_cpu_speed(); // get_mem_speed(membuf, npages); // loops_per_second = get_cpu_speed(); // pages_per_second = get_mem_speed(membuf, npages); get_calibration_values(&loops_per_second, &pages_per_second, membuf, membuf_npages); } fprintf(stderr, "calibrate: loops_per_second %Lf, pages_per_second %Lf\n", loops_per_second, pages_per_second); /* ./a.out [size (mbytes)] [speed (mbytes/s)] */ unsigned long speed = 1024UL * 1024 * atoi(argv[optind + 1]); unsigned long speed_in_pages = speed / 4096; fprintf(stderr, "update speed: %lu mbytes/s (%lu pages/s)\n", speed / 1024 / 1024, speed_in_pages); /* * We make the target memory update speed with our dummy CPU intensive * task. The task iterates the micro busy loops and memory updates. * Note that updating memory pages is also cpu intensive work. * * |-------------|-------------| ......(iterate)...... * T_{cpu} T_{mem} * * Let's consider the below situation; where * during T_{cpu}, the task performs busy loops (a times), and * during T_{mem}, the task performs memory updates (b pages). * * We get the capability of the given VM by measurement. Here we assume the VM can perform * busy loops by loops_per_second, and * memory upates by pages_per_second, * respectively. * * Then, * T_{cpu} = a / loops_per_second * T_{mem} = b / pages_per_second * * The target memory update speed, S (pages/s), is * S = b / (T_{cpu} + T_{mem}) * * Therefore, we get the ratio between a and b; * a / b = loops_per_second / S - loops_per_second / pages_per_second */ long double a_b = loops_per_second / speed_in_pages - loops_per_second / pages_per_second; printf("a:b : %Lf\n", a_b); /* * We emulate the situation where the CPU utilization of the memtouch * program is capped by a certain value. It is possible to use * cpulimit, but cpulimit does not make accurate results because of the * overhead of signaling and micro sleep. * * T_{cpu} + T_{mem} + T_{sleep} = 0.01 * The target CPU utilization, p = (T_{cpu} + T_{mem}) / 0.01. * T_{sleep} = 0.01 - (T_{cpu} + T_{mem}) = 0.01 - (0.01 * p) = 0.01 * (1 - p) */ const long double Tsleep = 0.01L * (1 - cpu_bound); /* * In the below loop, we want to complete one interation in 0.01 second. * * T_{cpu} + T_{mem} = 0.01 * a_b * b / loops_per_second + b / pages_per_second = 0.01 * * Then, we get b: */ // const long double b = 0.01L / (a_b / loops_per_second + 1.0 / pages_per_second); const long double b = (0.01L - Tsleep) / (a_b / loops_per_second + 1.0L / pages_per_second); fprintf(stderr, "b = %Lf\n", b); fprintf(stderr, "cpu_loops: %Lf\n", a_b * b); fprintf(stderr, "mem_loops: %Lf\n", b); long double Tcpu = a_b * b / loops_per_second; long double Tmem = b / pages_per_second; fprintf(stderr, "Tcpu = %10.10Lf\n", Tcpu); fprintf(stderr, "Tmem = %10.10Lf\n", Tmem); fprintf(stderr, "Tsleep = %10.10Lf\n", Tsleep); fprintf(stderr, "DVFS must be disabled\n"); unsigned long pre_total_npages = 0; struct timeval pre_tv; gettimeofday_or_error(&pre_tv); unsigned long hoge = 0; double slept = 0; long double loops_remain = 0; long double pages_remain = 0; /* touch memory */ for (;;) { long double nloops = a_b * b; long double npages = b; if (loops_remain > 1) { nloops += 1; loops_remain -= 1; } if (pages_remain > 1) { npages += 1; pages_remain -= 1; } struct timeval sta_tv; struct timeval end_tv; gettimeofday_or_error(&sta_tv); loops_remain += do_cpu_loop(nloops); pages_remain += do_mem_loop(npages, membuf, membuf_npages); /* This usleep() gives the hypervisor a chance to assign CPU to * another VM. */ usleep(1); gettimeofday_or_error(&end_tv); if (hoge % 100 == 0) { unsigned long updated = total_npages - pre_total_npages; struct timeval now_tv; gettimeofday_or_error(&now_tv); double duration = get_duration(&pre_tv, &now_tv); printf("duration %f (sec): updated %lu (%lu - %lu) (pages), speed %f (mbytes/s), sleep %f s\n", duration, updated, total_npages, pre_total_npages, updated * 4096 / duration / 1024 / 1024, slept); memcpy(&pre_tv, &now_tv, sizeof(now_tv)); pre_total_npages = total_npages; slept = 0; } /* If this program cannot get enough CPU ressource, the * duration between sta_tv and end_tv will exceed 0.01 seconds. * * => This is not true in some cases. The VM may execute the * code between sta_tv and end_tv without experiencing any * preemption. Then, the VM will sleep longer in the below. So, * I inserted usleep(1) above. If this VM needs to wait for the * next quantum, the above usleep(1) does not return right now. * It will behave like do_cpu_loop() and do_mem_loop() take * longer due to contention. * * In such case, we do not need to sleep. We have to run as * fast as possible. */ double duration = get_duration(&sta_tv, &end_tv); if (duration > 0.01) { // printf("duration %lf\n", duration); } else { // printf("duration %lf, sleep %lf, nloops %Lf npages %Lf\n", duration, (0.01 - duration), nloops, npages); slept += 0.01L - duration; usleep((0.01L - duration) * 1000 * 1000); } /* If DVFS is enabled, this program generates a slightly higher * CPU load than a target one, especially when the target CPU * load is small. This is because in lower CPU utilization * levels the system automatically slows down the frequency of * physical CPUs. */ #if 0 /* Reduce the number of usleep calls, but usleep a x10 period of time at once. */ // usleep(Tsleep * 1000 * 1000); if (hoge % 10 == 0) { // every 0.1 seconds usleep(Tsleep * 1000 * 1000 * 10); } #endif hoge += 1; } free(membuf); return 0; }
void high_resolution_clock::init() { float mhz = get_cpu_speed(); tics_per_nanosecond = mhz/1000; }