static void print_report(unsigned size, int duplex,cycles_t *tposted, cycles_t *tcompleted, struct perftest_parameters *user_param,int noPeak, int no_cpu_freq_fail) { double cycles_to_units; unsigned long tsize; /* Transferred size, in megabytes */ int i, j; int opt_posted = 0, opt_completed = 0; cycles_t opt_delta; cycles_t t; int iters = user_param->iters; opt_delta = tcompleted[opt_posted] - tposted[opt_completed]; if (!noPeak) { /* Find the peak bandwidth unless asked not to in command line*/ for (i = 0; i < iters * user_param->num_of_qps; ++i) for (j = i; j < iters * user_param->num_of_qps; ++j) { t = (tcompleted[j] - tposted[i]) / (j - i + 1); if (t < opt_delta) { opt_delta = t; opt_posted = i; opt_completed = j; } } } cycles_to_units = get_cpu_mhz(no_cpu_freq_fail) * 1000000; tsize = duplex ? 2 : 1; tsize = tsize * size; printf(REPORT_FMT,size,iters,!(noPeak) * tsize * cycles_to_units / opt_delta / 0x100000, tsize*iters*user_param->num_of_qps*cycles_to_units/(tcompleted[(iters*user_param->num_of_qps) - 1] - tposted[0]) / 0x100000); }
static void print_report(struct perftest_parameters *user_param) { double cycles_to_units; unsigned long tsize; /* Transferred size, in megabytes */ int i, j; int opt_posted = 0, opt_completed = 0; cycles_t opt_delta; cycles_t t; opt_delta = tcompleted[opt_posted] - tposted[opt_completed]; /* Find the peak bandwidth */ for (i = 0; i < user_param->iters * user_param->num_of_qps; ++i) for (j = i; j < user_param->iters * user_param->num_of_qps; ++j) { t = (tcompleted[j] - tposted[i]) / (j - i + 1); if (t < opt_delta) { opt_delta = t; opt_posted = i; opt_completed = j; } } cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000; tsize = user_param->duplex ? 2 : 1; tsize = tsize * user_param->size; printf(REPORT_FMT, (unsigned long)user_param->size,user_param->iters,tsize * cycles_to_units / opt_delta / 0x100000, tsize * user_param->iters * user_param->num_of_qps * cycles_to_units /(tcompleted[(user_param->iters* user_param->num_of_qps) - 1] - tposted[0]) / 0x100000); }
/* * The retrieval of the clock time and the TSC are not atomic, there * may be time unaccounted for. * * Could profile the RDTSC call at startup and use that measurement to * determine lost time...I'm not kidding, this comes from an Intel * guide about benchmarking and TSC. * * Now, if the clock time is stored somewhere whenever the TSC is * reset then I could use that value as the base and it would be * accurate, but for now use this hack. */ static void _init_fasttime() { (void) check_tsc(); if ((_sys_clock_gettime = dlsym(RTLD_NEXT, "clock_gettime")) == NULL) { perror("failed to load system clock_gettime()"); exit(1); } if ((_sys_gettimeofday = dlsym(RTLD_NEXT, "gettimeofday")) == NULL) { perror("failed to load system gettimeofday()"); exit(1); } /* * The approximate value of the kernel's cpu_freq_hz. * Approximate because the kernel uses emperical readings of * the TSC against PIT timeouts to determine the clock * frequency. The pi_clock value should be based on this value * but some of the precision is lost, not sure if that * matters much in practice. */ approx_cpu_hz = MHZ_TO_HZ(get_cpu_mhz()); nsec_scale = (uint64_t)(((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / approx_cpu_hz); sync_local_clock(); }
static void print_report(struct report_options * options, unsigned int iters, cycles_t *tstamp,int size, int no_cpu_freq_fail) { double cycles_to_units; cycles_t median; unsigned int i; const char* units; cycles_t *delta = malloc((iters - 1) * sizeof *delta); if (!delta) { perror("malloc"); return; } for (i = 0; i < iters - 1; ++i) delta[i] = tstamp[i + 1] - tstamp[i]; if (options->cycles) { cycles_to_units = 1; units = "cycles"; } else { cycles_to_units = get_cpu_mhz(no_cpu_freq_fail); units = "usec"; } if (options->unsorted) { printf("#, %s\n", units); for (i = 0; i < iters - 1; ++i) printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2); } qsort(delta, iters - 1, sizeof *delta, cycles_compare); if (options->histogram) { printf("#, %s\n", units); for (i = 0; i < iters - 1; ++i) printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2); } median = get_median(iters - 1, delta); printf("%7d %d %7.2f %7.2f %7.2f\n", size,iters,delta[0] / cycles_to_units / 2, delta[iters - 2] / cycles_to_units / 2,median / cycles_to_units / 2); free(delta); }
static void print_report(struct perftest_parameters *user_param) { double cycles_to_units; cycles_t median; unsigned int i; const char* units; cycles_t *delta = malloc((user_param->iters - 1) * sizeof *delta); if (!delta) { perror("malloc"); return; } for (i = 0; i < user_param->iters - 1; ++i) delta[i] = tstamp[i + 1] - tstamp[i]; if (user_param->r_flag->cycles) { cycles_to_units = 1; units = "cycles"; } else { cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); units = "usec"; } if (user_param->r_flag->unsorted) { printf("#, %s\n", units); for (i = 0; i < user_param->iters - 1; ++i) printf("%d, %g\n", i + 1, delta[i] / cycles_to_units ); } qsort(delta, user_param->iters - 1, sizeof *delta, cycles_compare); if (user_param->r_flag->histogram) { printf("#, %s\n", units); for (i = 0; i < user_param->iters - 1; ++i) printf("%d, %g\n", i + 1, delta[i] / cycles_to_units ); } median = get_median(user_param->iters - 1, delta); printf(REPORT_FMT_LAT,(unsigned long)user_param->size,user_param->iters,delta[0] / cycles_to_units , delta[user_param->iters - 2] / cycles_to_units ,median / cycles_to_units ); free(delta); }
int main (int argc, char *argv[]) { start_counter(); (void) argv; //supress warning. //0) Compute CPU speed experimentally. mhz = get_cpu_mhz(); // on a 3.5 ghz proc this is usually ~3 4xx xyz xyz, indicating cycles/second. printf("# Cpu_mhz: %lu\n", mhz); fflush(stdout); if (single_affinity) { set_single_cpu_affinity(); } // Just passing an arg will branch to context switch experiment. if (argc == 2) { context_switch_experiment(); return 0; } else { inactive_time_experiment(); } printf("\n\n"); //clear screen. return 0; }
/*---------------------------------------------------------------------------*/ int run_client_test(struct perf_parameters *user_param) { struct session_data sess_data; struct perf_comm *comm; struct thread_data *tdata; char url[256]; int i = 0; int max_cpus; pthread_t statistics_thread_id; struct perf_command command; int size_log2; int max_size_log2 = 24; /* client session attributes */ struct xio_session_attr attr = { &ses_ops, NULL, 0 }; xio_init(); g_mhz = get_cpu_mhz(0); max_cpus = sysconf(_SC_NPROCESSORS_ONLN); threads_iter = 1; size_log2 = 0; tdata = calloc(user_param->threads_num, sizeof(*tdata)); if (tdata == NULL) { fprintf(fd, "malloc failed\n"); return -1; } comm = create_comm_struct(user_param); if (establish_connection(comm)) { fprintf(stderr, "failed to establish connection\n"); free(tdata); destroy_comm_struct(comm); return -1; } if (user_param->output_file) { fd = fopen(user_param->output_file, "w"); if (fd == NULL) { fprintf(fd, "file open failed. %s\n", user_param->output_file); free(sess_data.tdata); destroy_comm_struct(comm); return -1; } fprintf(fd, "size, threads, tps, bw[Mbps], lat[usec]\n"); fflush(fd); } printf("%s", RESULT_FMT); printf("%s", RESULT_LINE); while (threads_iter <= user_param->threads_num) { data_len = (uint64_t)1 << size_log2; memset(&sess_data, 0, sizeof(sess_data)); memset(tdata, 0, user_param->threads_num*sizeof(*tdata)); sess_data.tdata = tdata; command.test_param.machine_type = user_param->machine_type; command.test_param.test_type = user_param->test_type; command.test_param.verb = user_param->verb; command.test_param.data_len = data_len; command.command = GetTestParams; ctx_write_data(comm, &command, sizeof(command)); sprintf(url, "rdma://%s:%d", user_param->server_addr, user_param->server_port); sess_data.session = xio_session_create(XIO_SESSION_CLIENT, &attr, url, 0, 0, &sess_data); if (sess_data.session == NULL) { int error = xio_errno(); fprintf(stderr, "session creation failed. reason %d - (%s)\n", error, xio_strerror(error)); goto cleanup; } pthread_create(&statistics_thread_id, NULL, statistics_thread_cb, &sess_data); /* spawn threads to handle connection */ for (i = 0; i < threads_iter; i++) { sess_data.tdata[i].affinity = ((user_param->cpu + i) % max_cpus); sess_data.tdata[i].cid = i; sess_data.tdata[i].sdata = &sess_data; sess_data.tdata[i].user_param = user_param; sess_data.tdata[i].data_len = data_len; /* all threads are working on the same session */ sess_data.tdata[i].session = sess_data.session; pthread_create(&sess_data.tdata[i].thread_id, NULL, worker_thread, &sess_data.tdata[i]); } pthread_join(statistics_thread_id, NULL); /* join the threads */ for (i = 0; i < threads_iter; i++) pthread_join(sess_data.tdata[i].thread_id, NULL); /* close the session */ xio_session_destroy(sess_data.session); if (sess_data.abort) { fprintf(stderr, "program aborted\n"); goto cleanup; } /* send result to server */ command.results.bytes = data_len; command.results.threads = threads_iter; command.results.tps = sess_data.tps; command.results.avg_bw = sess_data.avg_bw; command.results.avg_lat = sess_data.avg_lat_us; command.results.min_lat = sess_data.min_lat_us; command.results.max_lat = sess_data.max_lat_us; command.command = GetTestResults; /* sync point */ ctx_write_data(comm, &command, sizeof(command)); printf(REPORT_FMT, data_len, threads_iter, sess_data.tps, sess_data.avg_bw, sess_data.avg_lat_us, sess_data.min_lat_us, sess_data.max_lat_us); if (fd) fprintf(fd, "%lu, %d, %lu, %.2lf, %.2lf\n", data_len, threads_iter, sess_data.tps, sess_data.avg_bw, sess_data.avg_lat_us); fflush(fd); /* sync point */ ctx_read_data(comm, NULL, 0, NULL); if (++size_log2 < max_size_log2) continue; threads_iter++; size_log2 = 0; } printf("%s", RESULT_LINE); cleanup: if (fd) fclose(fd); ctx_hand_shake(comm); ctx_close_connection(comm); destroy_comm_struct(comm); free(tdata); xio_shutdown(); return 0; }
int main(int argc, char* argv[]) { int err = EXIT_FAILURE; struct cmd_line cmd_line; setlocale(LC_ALL, ""); if (parse_cmd_line(argc, argv, &cmd_line) < 0) goto arg_check_failed; struct pci_access * pci = pci_alloc(); if (! pci) goto pci_alloc_failed; /* This access bypass the kernel and use a memory mapping * to PCI configuration registers */ pci->method = PCI_ACCESS_I386_TYPE1; struct pci_dev * dev = create_pci_dev(pci, cmd_line.slot); if (! dev) goto create_pci_dev_failed; print_device_name(pci, dev); unsigned long * timestamps = malloc(sizeof(*timestamps) * cmd_line.iteration_count); if (! timestamps) { fprintf(stderr, "Can't allocate timestamp storage (%s)\n", strerror(errno)); goto malloc_failed; } struct timestamp t; read_timestamp_counter(&t); perform_reads(dev, timestamps, cmd_line.iteration_count, cmd_line.wait_time_us); unsigned long test_duration_cycles = cycle_since_timestamp(&t); double cpu_mhz = get_cpu_mhz(); if (cpu_mhz < 0) goto get_cpu_mhz_failed; print_results(cpu_mhz, timestamps, cmd_line.iteration_count, test_duration_cycles, cmd_line.limit_ns); err = EXIT_SUCCESS; get_cpu_mhz_failed: free(timestamps); malloc_failed: pci_free_dev(dev); create_pci_dev_failed: pci_cleanup(pci); pci_alloc_failed: arg_check_failed: return err; }
int main(void) { const struct bios_config *bios; int acpi_enabled; /* Initialise hypercall stubs with RET, rendering them no-ops. */ memset((void *)HYPERCALL_PHYSICAL_ADDRESS, 0xc3 /* RET */, PAGE_SIZE); printf("HVM Loader\n"); init_hypercalls(); xenbus_setup(); bios = detect_bios(); printf("System requested %s\n", bios->name); printf("CPU speed is %u MHz\n", get_cpu_mhz()); apic_setup(); pci_setup(); smp_initialise(); perform_tests(); if ( bios->bios_info_setup ) bios->bios_info_setup(); if ( bios->create_smbios_tables ) { printf("Writing SMBIOS tables ...\n"); bios->create_smbios_tables(); } printf("Loading %s ...\n", bios->name); if ( bios->bios_load ) bios->bios_load(bios); else { BUG_ON(bios->bios_address + bios->image_size > HVMLOADER_PHYSICAL_ADDRESS); memcpy((void *)bios->bios_address, bios->image, bios->image_size); } if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode ) { if ( bios->create_mp_tables ) bios->create_mp_tables(); if ( bios->create_pir_tables ) bios->create_pir_tables(); } if ( bios->load_roms ) bios->load_roms(); acpi_enabled = !strncmp(xenstore_read("platform/acpi", "1"), "1", 1); if ( acpi_enabled ) { init_vnuma_info(); if ( bios->acpi_build_tables ) { printf("Loading ACPI ...\n"); bios->acpi_build_tables(); } acpi_enable_sci(); hvm_param_set(HVM_PARAM_ACPI_IOPORTS_LOCATION, 1); } init_vm86_tss(); cmos_write_memory_size(); printf("BIOS map:\n"); if ( SCRATCH_PHYSICAL_ADDRESS != scratch_start ) printf(" %05x-%05lx: Scratch space\n", SCRATCH_PHYSICAL_ADDRESS, scratch_start); printf(" %05x-%05x: Main BIOS\n", bios->bios_address, bios->bios_address + bios->image_size - 1); if ( bios->e820_setup ) bios->e820_setup(); if ( bios->bios_info_finish ) bios->bios_info_finish(); xenbus_shutdown(); printf("Invoking %s ...\n", bios->name); return 0; }
int main(void) { const struct bios_config *bios; int acpi_enabled; const struct hvm_modlist_entry *bios_module; /* Initialise hypercall stubs with RET, rendering them no-ops. */ memset((void *)HYPERCALL_PHYSICAL_ADDRESS, 0xc3 /* RET */, PAGE_SIZE); printf("HVM Loader\n"); BUG_ON(hvm_start_info->magic != XEN_HVM_START_MAGIC_VALUE); init_hypercalls(); memory_map_setup(); xenbus_setup(); bios = detect_bios(); printf("System requested %s\n", bios->name); printf("CPU speed is %u MHz\n", get_cpu_mhz()); apic_setup(); pci_setup(); smp_initialise(); perform_tests(); if ( bios->bios_info_setup ) bios->bios_info_setup(); if ( bios->create_smbios_tables ) { printf("Writing SMBIOS tables ...\n"); bios->create_smbios_tables(); } printf("Loading %s ...\n", bios->name); bios_module = get_module_entry(hvm_start_info, "firmware"); if ( bios_module ) { uint32_t paddr = bios_module->paddr; bios->bios_load(bios, (void*)paddr, bios_module->size); } #ifdef ENABLE_ROMBIOS else if ( bios == &rombios_config ) { bios->bios_load(bios, NULL, 0); } #endif else { /* * If there is no BIOS module supplied and if there is no embeded BIOS * image, then we failed. Only rombios might have an embedded bios blob. */ printf("no BIOS ROM image found\n"); BUG(); } if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode ) { if ( bios->create_mp_tables ) bios->create_mp_tables(); if ( bios->create_pir_tables ) bios->create_pir_tables(); } if ( bios->load_roms ) bios->load_roms(); acpi_enabled = !strncmp(xenstore_read("platform/acpi", "1"), "1", 1); if ( acpi_enabled ) { init_vnuma_info(); if ( bios->acpi_build_tables ) { printf("Loading ACPI ...\n"); bios->acpi_build_tables(); } acpi_enable_sci(); hvm_param_set(HVM_PARAM_ACPI_IOPORTS_LOCATION, 1); } init_vm86_tss(); cmos_write_memory_size(); printf("BIOS map:\n"); if ( SCRATCH_PHYSICAL_ADDRESS != scratch_start ) printf(" %05x-%05lx: Scratch space\n", SCRATCH_PHYSICAL_ADDRESS, scratch_start); printf(" %05x-%05x: Main BIOS\n", bios->bios_address, bios->bios_address + bios->image_size - 1); if ( bios->e820_setup ) bios->e820_setup(); if ( bios->bios_info_finish ) bios->bios_info_finish(); xenbus_shutdown(); printf("Invoking %s ...\n", bios->name); return 0; }
int main(int argc, char* argv[]) { int err = EXIT_FAILURE; struct cmd_line cmd_line; setlocale(LC_ALL, ""); if (parse_cmd_line(argc, argv, &cmd_line) < 0) goto arg_check_failed; const double cpu_mhz = get_cpu_mhz(); if (cpu_mhz < 0) goto get_cpu_mhz_failed; const double cycles_per_ns = cpu_mhz / 1e3; const double cycles_limit = cycles_per_ns * cmd_line.limit_ns; size_t spikes_count = 0; struct spike * spikes = calloc(1000, sizeof(struct spike)); if (! spikes) { fprintf(stderr, "Failed to allocate spikes %s\n", strerror(errno)); goto calloc_failed; } struct timestamp initial_timestamp; read_timestamp_counter(&initial_timestamp); struct timestamp t[2]; read_timestamp_counter(&t[0]); size_t i; for (i = 1; i < cmd_line.iteration_count; ++ i) { read_timestamp_counter(&t[i % 2]); uint64_t diff = diff_timestamps(&t[(i - 1) % 2], &t[i % 2]); if (diff > cycles_limit) { spikes[spikes_count].cycles_delta = diff; memcpy(&spikes[spikes_count].timestamp, &t[i % 2], sizeof(struct timestamp)); ++ spikes_count; if (spikes_count == MAX_SPIKES) { print_spikes(spikes, spikes_count, cycles_per_ns, &initial_timestamp); spikes_count = 0; } read_timestamp_counter(&t[ i % 2]); } } print_spikes(spikes, spikes_count, cycles_per_ns, &initial_timestamp); const double cycles_per_ms = cpu_mhz * 1e3; fprintf(stdout, "Iterations count: %'zu\n" "Sampling duration: %'.0lf ms\n" "Detected frequency: %.0lf Mhz\n", cmd_line.iteration_count, cycle_since_timestamp(&initial_timestamp) / cycles_per_ms, cpu_mhz); err = EXIT_SUCCESS; free(spikes); calloc_failed: get_cpu_mhz_failed: arg_check_failed: return err; }