Beispiel #1
0
static void print_report(unsigned size, int duplex,cycles_t *tposted, cycles_t *tcompleted,
						 struct perftest_parameters *user_param,int noPeak, int no_cpu_freq_fail) {

	double cycles_to_units;
	unsigned long tsize;	/* Transferred size, in megabytes */
	int i, j;
	int opt_posted = 0, opt_completed = 0;
	cycles_t opt_delta;
	cycles_t t;
	int iters = user_param->iters;


	opt_delta = tcompleted[opt_posted] - tposted[opt_completed];

	if (!noPeak) {
		/* Find the peak bandwidth unless asked not to in command line*/
		for (i = 0; i < iters * user_param->num_of_qps; ++i)
		  for (j = i; j < iters * user_param->num_of_qps; ++j) {
		    t = (tcompleted[j] - tposted[i]) / (j - i + 1);
		    if (t < opt_delta) {
		      opt_delta  = t;
		      opt_posted = i;
		      opt_completed = j;
		    }
		  }
	}
	
	cycles_to_units = get_cpu_mhz(no_cpu_freq_fail) * 1000000;

	tsize = duplex ? 2 : 1;
	tsize = tsize * size;
	printf(REPORT_FMT,size,iters,!(noPeak) * tsize * cycles_to_units / opt_delta / 0x100000,
	       tsize*iters*user_param->num_of_qps*cycles_to_units/(tcompleted[(iters*user_param->num_of_qps) - 1] - tposted[0]) / 0x100000);
}
static void print_report(struct perftest_parameters *user_param) {

	double cycles_to_units;
	unsigned long tsize;	/* Transferred size, in megabytes */
	int i, j;
	int opt_posted = 0, opt_completed = 0;
	cycles_t opt_delta;
	cycles_t t;


	opt_delta = tcompleted[opt_posted] - tposted[opt_completed];

	/* Find the peak bandwidth */
	for (i = 0; i < user_param->iters * user_param->num_of_qps; ++i)
		for (j = i; j < user_param->iters * user_param->num_of_qps; ++j) {
			t = (tcompleted[j] - tposted[i]) / (j - i + 1);
			if (t < opt_delta) {
				opt_delta  = t;
				opt_posted = i;
				opt_completed = j;
            }
	}

	cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000;

	tsize = user_param->duplex ? 2 : 1;
	tsize = tsize * user_param->size;
	printf(REPORT_FMT,
	       (unsigned long)user_param->size,user_param->iters,tsize * cycles_to_units / opt_delta / 0x100000,
	       tsize * user_param->iters * user_param->num_of_qps * cycles_to_units /(tcompleted[(user_param->iters* user_param->num_of_qps) - 1] - tposted[0]) / 0x100000);
}
Beispiel #3
0
/*
 * The retrieval of the clock time and the TSC are not atomic, there
 * may be time unaccounted for.
 *
 * Could profile the RDTSC call at startup and use that measurement to
 * determine lost time...I'm not kidding, this comes from an Intel
 * guide about benchmarking and TSC.
 *
 * Now, if the clock time is stored somewhere whenever the TSC is
 * reset then I could use that value as the base and it would be
 * accurate, but for now use this hack.
 */
static void
_init_fasttime()
{
	(void) check_tsc();

	if ((_sys_clock_gettime = dlsym(RTLD_NEXT, "clock_gettime")) == NULL) {
		perror("failed to load system clock_gettime()");
		exit(1);
	}

	if ((_sys_gettimeofday = dlsym(RTLD_NEXT, "gettimeofday")) == NULL) {
		perror("failed to load system gettimeofday()");
		exit(1);
	}

	/*
	 * The approximate value of the kernel's cpu_freq_hz.
	 * Approximate because the kernel uses emperical readings of
	 * the TSC against PIT timeouts to determine the clock
	 * frequency. The pi_clock value should be based on this value
	 * but some of the precision is lost, not sure if that
	 * matters much in practice.
	 */
	approx_cpu_hz = MHZ_TO_HZ(get_cpu_mhz());
	nsec_scale =
	    (uint64_t)(((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / approx_cpu_hz);

	sync_local_clock();
}
Beispiel #4
0
static void print_report(struct report_options * options,
			 unsigned int iters, cycles_t *tstamp,int size, int no_cpu_freq_fail)
{
	double cycles_to_units;
	cycles_t median;
	unsigned int i;
	const char* units;
	cycles_t *delta = malloc((iters - 1) * sizeof *delta);

	if (!delta) {
		perror("malloc");
		return;
	}

	for (i = 0; i < iters - 1; ++i)
		delta[i] = tstamp[i + 1] - tstamp[i];


	if (options->cycles) {
		cycles_to_units = 1;
		units = "cycles";
	} else {
		cycles_to_units = get_cpu_mhz(no_cpu_freq_fail);
		units = "usec";
	}

	if (options->unsorted) {
		printf("#, %s\n", units);
		for (i = 0; i < iters - 1; ++i)
			printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
	}

	qsort(delta, iters - 1, sizeof *delta, cycles_compare);

	if (options->histogram) {
		printf("#, %s\n", units);
		for (i = 0; i < iters - 1; ++i)
			printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
	}

	median = get_median(iters - 1, delta);
	printf("%7d        %d        %7.2f        %7.2f          %7.2f\n",
	       size,iters,delta[0] / cycles_to_units / 2,
	       delta[iters - 2] / cycles_to_units / 2,median / cycles_to_units / 2);
	free(delta);
}
Beispiel #5
0
static void print_report(struct perftest_parameters *user_param) {

	double cycles_to_units;
	cycles_t median;
	unsigned int i;
	const char* units;
	cycles_t *delta = malloc((user_param->iters - 1) * sizeof *delta);

	if (!delta) {
		perror("malloc");
		return;
	}

	for (i = 0; i < user_param->iters - 1; ++i)
		delta[i] = tstamp[i + 1] - tstamp[i];


	if (user_param->r_flag->cycles) {
		cycles_to_units = 1;
		units = "cycles";
	} else {
		cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f);
		units = "usec";
	}

	if (user_param->r_flag->unsorted) {
		printf("#, %s\n", units);
		for (i = 0; i < user_param->iters - 1; ++i)
			printf("%d, %g\n", i + 1, delta[i] / cycles_to_units );
	}

	qsort(delta, user_param->iters - 1, sizeof *delta, cycles_compare);

	if (user_param->r_flag->histogram) {
		printf("#, %s\n", units);
		for (i = 0; i < user_param->iters - 1; ++i)
			printf("%d, %g\n", i + 1, delta[i] / cycles_to_units );
	}

	median = get_median(user_param->iters - 1, delta);
	printf(REPORT_FMT_LAT,(unsigned long)user_param->size,user_param->iters,delta[0] / cycles_to_units ,
	       delta[user_param->iters - 2] / cycles_to_units ,median / cycles_to_units );

	free(delta);
}
Beispiel #6
0
int main (int argc, char *argv[])
{
	start_counter();
	(void) argv; //supress warning.
	//0)  Compute CPU speed experimentally.
	mhz = get_cpu_mhz();  // on a 3.5 ghz proc this is usually ~3 4xx xyz xyz, indicating cycles/second.
	printf("# Cpu_mhz: %lu\n", mhz);
	fflush(stdout);
	if (single_affinity) {
		set_single_cpu_affinity();
	}

	// Just passing an arg will branch to context switch experiment.
	if (argc == 2) {
		context_switch_experiment();
		return 0;
	} else {
		inactive_time_experiment();
	}

  printf("\n\n"); //clear screen.
  return 0;
}
/*---------------------------------------------------------------------------*/
int run_client_test(struct perf_parameters *user_param)
{
	struct session_data	sess_data;
	struct perf_comm	*comm;
	struct thread_data	*tdata;
	char			url[256];
	int			i = 0;
	int			max_cpus;
	pthread_t		statistics_thread_id;
	struct perf_command	command;
	int			size_log2;
	int			max_size_log2 = 24;


	/* client session attributes */
	struct xio_session_attr attr = {
		&ses_ops,
		NULL,
		0
	};
	xio_init();

	g_mhz		= get_cpu_mhz(0);
	max_cpus	= sysconf(_SC_NPROCESSORS_ONLN);
	threads_iter	= 1;
	size_log2	= 0;

	tdata = calloc(user_param->threads_num, sizeof(*tdata));
	if (tdata == NULL) {
		fprintf(fd, "malloc failed\n");
		return -1;
	}

	comm = create_comm_struct(user_param);
	if (establish_connection(comm)) {
		fprintf(stderr, "failed to establish connection\n");
		free(tdata);
		destroy_comm_struct(comm);
		return -1;
	}

	if (user_param->output_file) {
		fd = fopen(user_param->output_file, "w");
		if (fd == NULL) {
			fprintf(fd, "file open failed. %s\n",
				user_param->output_file);
			free(sess_data.tdata);
			destroy_comm_struct(comm);
			return -1;
		}
		fprintf(fd, "size, threads, tps, bw[Mbps], lat[usec]\n");
		fflush(fd);
	}


	printf("%s", RESULT_FMT);
	printf("%s", RESULT_LINE);


	while (threads_iter <= user_param->threads_num)  {
		data_len	= (uint64_t)1 << size_log2;

		memset(&sess_data, 0, sizeof(sess_data));
		memset(tdata, 0, user_param->threads_num*sizeof(*tdata));
		sess_data.tdata = tdata;

		command.test_param.machine_type	= user_param->machine_type;
		command.test_param.test_type	= user_param->test_type;
		command.test_param.verb		= user_param->verb;
		command.test_param.data_len	= data_len;
		command.command			= GetTestParams;

		ctx_write_data(comm, &command, sizeof(command));

		sprintf(url, "rdma://%s:%d", user_param->server_addr,
			user_param->server_port);
		sess_data.session = xio_session_create(XIO_SESSION_CLIENT,
				&attr, url, 0, 0, &sess_data);
		if (sess_data.session == NULL) {
			int error = xio_errno();
			fprintf(stderr,
				"session creation failed. reason %d - (%s)\n",
				error, xio_strerror(error));
			goto cleanup;
		}

		pthread_create(&statistics_thread_id, NULL,
			       statistics_thread_cb, &sess_data);

		/* spawn threads to handle connection */
		for (i = 0; i < threads_iter; i++) {
			sess_data.tdata[i].affinity		=
				((user_param->cpu + i) % max_cpus);
			sess_data.tdata[i].cid			= i;
			sess_data.tdata[i].sdata		= &sess_data;
			sess_data.tdata[i].user_param		= user_param;
			sess_data.tdata[i].data_len		= data_len;

			/* all threads are working on the same session */
			sess_data.tdata[i].session	= sess_data.session;
			pthread_create(&sess_data.tdata[i].thread_id, NULL,
				       worker_thread, &sess_data.tdata[i]);
		}

		pthread_join(statistics_thread_id, NULL);

		/* join the threads */
		for (i = 0; i < threads_iter; i++)
			pthread_join(sess_data.tdata[i].thread_id, NULL);

		/* close the session */
		xio_session_destroy(sess_data.session);

		if (sess_data.abort) {
			fprintf(stderr, "program aborted\n");
			goto cleanup;
		}

		/* send result to server */
		command.results.bytes		= data_len;
		command.results.threads		= threads_iter;
		command.results.tps		= sess_data.tps;
		command.results.avg_bw		= sess_data.avg_bw;
		command.results.avg_lat		= sess_data.avg_lat_us;
		command.results.min_lat		= sess_data.min_lat_us;
		command.results.max_lat		= sess_data.max_lat_us;
		command.command			= GetTestResults;

		/* sync point */
		ctx_write_data(comm, &command, sizeof(command));

		printf(REPORT_FMT,
		       data_len,
		       threads_iter,
		       sess_data.tps,
		       sess_data.avg_bw,
		       sess_data.avg_lat_us,
		       sess_data.min_lat_us,
		       sess_data.max_lat_us);
		if (fd)
			fprintf(fd, "%lu, %d, %lu, %.2lf, %.2lf\n",
				data_len,
				threads_iter,
				sess_data.tps,
				sess_data.avg_bw,
				sess_data.avg_lat_us);
		fflush(fd);

		/* sync point */
		ctx_read_data(comm, NULL, 0, NULL);

		if (++size_log2 < max_size_log2)
			continue;

		threads_iter++;
		size_log2 = 0;
	}

	printf("%s", RESULT_LINE);

cleanup:
	if (fd)
		fclose(fd);

	ctx_hand_shake(comm);

	ctx_close_connection(comm);

	destroy_comm_struct(comm);

	free(tdata);

	xio_shutdown();

	return 0;
}
int
main(int argc, char* argv[])
{
    int err = EXIT_FAILURE;

    struct cmd_line cmd_line;

    setlocale(LC_ALL, "");

    if (parse_cmd_line(argc, argv, &cmd_line) < 0)
        goto arg_check_failed;

    struct pci_access * pci = pci_alloc();
    if (! pci)
        goto pci_alloc_failed;

    /* This access bypass the kernel and use a memory mapping
     * to PCI configuration registers */
    pci->method = PCI_ACCESS_I386_TYPE1;

    struct pci_dev * dev = create_pci_dev(pci, cmd_line.slot);
    if (! dev)
        goto create_pci_dev_failed;

    print_device_name(pci, dev);

    unsigned long * timestamps
            = malloc(sizeof(*timestamps) * cmd_line.iteration_count);
    if (! timestamps)
    {
        fprintf(stderr, "Can't allocate timestamp storage (%s)\n",
                strerror(errno));
        goto malloc_failed;
    }

    struct timestamp t;
    read_timestamp_counter(&t);

    perform_reads(dev,
                  timestamps,

                  cmd_line.iteration_count,
                  cmd_line.wait_time_us);

    unsigned long test_duration_cycles = cycle_since_timestamp(&t);

    double cpu_mhz = get_cpu_mhz();
    if (cpu_mhz < 0)
        goto get_cpu_mhz_failed;

    print_results(cpu_mhz,
                  timestamps,
                  cmd_line.iteration_count,
                  test_duration_cycles,
                  cmd_line.limit_ns);


    err = EXIT_SUCCESS;

get_cpu_mhz_failed:
    free(timestamps);
malloc_failed:
    pci_free_dev(dev);
create_pci_dev_failed:
    pci_cleanup(pci);
pci_alloc_failed:
arg_check_failed:
    return err;
}
Beispiel #9
0
int main(void)
{
    const struct bios_config *bios;
    int acpi_enabled;

    /* Initialise hypercall stubs with RET, rendering them no-ops. */
    memset((void *)HYPERCALL_PHYSICAL_ADDRESS, 0xc3 /* RET */, PAGE_SIZE);

    printf("HVM Loader\n");

    init_hypercalls();

    xenbus_setup();

    bios = detect_bios();
    printf("System requested %s\n", bios->name);

    printf("CPU speed is %u MHz\n", get_cpu_mhz());

    apic_setup();
    pci_setup();

    smp_initialise();

    perform_tests();

    if ( bios->bios_info_setup )
        bios->bios_info_setup();

    if ( bios->create_smbios_tables )
    {
        printf("Writing SMBIOS tables ...\n");
        bios->create_smbios_tables();
    }

    printf("Loading %s ...\n", bios->name);
    if ( bios->bios_load )
        bios->bios_load(bios);
    else
    {
        BUG_ON(bios->bios_address + bios->image_size >
               HVMLOADER_PHYSICAL_ADDRESS);
        memcpy((void *)bios->bios_address, bios->image,
               bios->image_size);
    }

    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
    {
        if ( bios->create_mp_tables )
            bios->create_mp_tables();
        if ( bios->create_pir_tables )
            bios->create_pir_tables();
    }

    if ( bios->load_roms )
        bios->load_roms();

    acpi_enabled = !strncmp(xenstore_read("platform/acpi", "1"), "1", 1);

    if ( acpi_enabled )
    {
        init_vnuma_info();

        if ( bios->acpi_build_tables )
        {
            printf("Loading ACPI ...\n");
            bios->acpi_build_tables();
        }

        acpi_enable_sci();

        hvm_param_set(HVM_PARAM_ACPI_IOPORTS_LOCATION, 1);
    }

    init_vm86_tss();

    cmos_write_memory_size();

    printf("BIOS map:\n");
    if ( SCRATCH_PHYSICAL_ADDRESS != scratch_start )
        printf(" %05x-%05lx: Scratch space\n",
               SCRATCH_PHYSICAL_ADDRESS, scratch_start);
    printf(" %05x-%05x: Main BIOS\n",
           bios->bios_address,
           bios->bios_address + bios->image_size - 1);

    if ( bios->e820_setup )
        bios->e820_setup();

    if ( bios->bios_info_finish )
        bios->bios_info_finish();

    xenbus_shutdown();

    printf("Invoking %s ...\n", bios->name);
    return 0;
}
Beispiel #10
0
int main(void)
{
    const struct bios_config *bios;
    int acpi_enabled;
    const struct hvm_modlist_entry *bios_module;

    /* Initialise hypercall stubs with RET, rendering them no-ops. */
    memset((void *)HYPERCALL_PHYSICAL_ADDRESS, 0xc3 /* RET */, PAGE_SIZE);

    printf("HVM Loader\n");
    BUG_ON(hvm_start_info->magic != XEN_HVM_START_MAGIC_VALUE);

    init_hypercalls();

    memory_map_setup();

    xenbus_setup();

    bios = detect_bios();
    printf("System requested %s\n", bios->name);

    printf("CPU speed is %u MHz\n", get_cpu_mhz());

    apic_setup();
    pci_setup();

    smp_initialise();

    perform_tests();

    if ( bios->bios_info_setup )
        bios->bios_info_setup();

    if ( bios->create_smbios_tables )
    {
        printf("Writing SMBIOS tables ...\n");
        bios->create_smbios_tables();
    }

    printf("Loading %s ...\n", bios->name);
    bios_module = get_module_entry(hvm_start_info, "firmware");
    if ( bios_module )
    {
        uint32_t paddr = bios_module->paddr;

        bios->bios_load(bios, (void*)paddr, bios_module->size);
    }
#ifdef ENABLE_ROMBIOS
    else if ( bios == &rombios_config )
    {
        bios->bios_load(bios, NULL, 0);
    }
#endif
    else
    {
        /*
         * If there is no BIOS module supplied and if there is no embeded BIOS
         * image, then we failed. Only rombios might have an embedded bios blob.
         */
        printf("no BIOS ROM image found\n");
        BUG();
    }

    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
    {
        if ( bios->create_mp_tables )
            bios->create_mp_tables();
        if ( bios->create_pir_tables )
            bios->create_pir_tables();
    }

    if ( bios->load_roms )
        bios->load_roms();

    acpi_enabled = !strncmp(xenstore_read("platform/acpi", "1"), "1", 1);

    if ( acpi_enabled )
    {
        init_vnuma_info();

        if ( bios->acpi_build_tables )
        {
            printf("Loading ACPI ...\n");
            bios->acpi_build_tables();
        }

        acpi_enable_sci();

        hvm_param_set(HVM_PARAM_ACPI_IOPORTS_LOCATION, 1);
    }

    init_vm86_tss();

    cmos_write_memory_size();

    printf("BIOS map:\n");
    if ( SCRATCH_PHYSICAL_ADDRESS != scratch_start )
        printf(" %05x-%05lx: Scratch space\n",
               SCRATCH_PHYSICAL_ADDRESS, scratch_start);
    printf(" %05x-%05x: Main BIOS\n",
           bios->bios_address,
           bios->bios_address + bios->image_size - 1);

    if ( bios->e820_setup )
        bios->e820_setup();

    if ( bios->bios_info_finish )
        bios->bios_info_finish();

    xenbus_shutdown();

    printf("Invoking %s ...\n", bios->name);
    return 0;
}
int
main(int argc, char* argv[])
{
    int err = EXIT_FAILURE;

    struct cmd_line cmd_line;

    setlocale(LC_ALL, "");

    if (parse_cmd_line(argc, argv, &cmd_line) < 0)
        goto arg_check_failed;

    const double cpu_mhz = get_cpu_mhz();
    if (cpu_mhz < 0)
        goto get_cpu_mhz_failed;

    const double cycles_per_ns = cpu_mhz / 1e3;
    const double cycles_limit = cycles_per_ns * cmd_line.limit_ns;

    size_t spikes_count = 0;

    struct spike * spikes = calloc(1000, sizeof(struct spike));
    if (! spikes) {
        fprintf(stderr, "Failed to allocate spikes %s\n",
                strerror(errno));
        goto calloc_failed;
    }

    struct timestamp initial_timestamp;
    read_timestamp_counter(&initial_timestamp);

    struct timestamp t[2];
    read_timestamp_counter(&t[0]);

    size_t i;
    for (i = 1; i < cmd_line.iteration_count; ++ i)
    {
        read_timestamp_counter(&t[i % 2]);
        uint64_t diff = diff_timestamps(&t[(i - 1) % 2], &t[i % 2]);
        if (diff > cycles_limit)
        {
            spikes[spikes_count].cycles_delta = diff;
            memcpy(&spikes[spikes_count].timestamp,
                   &t[i % 2], sizeof(struct timestamp));
            ++ spikes_count;

            if (spikes_count == MAX_SPIKES)
            {
                print_spikes(spikes, spikes_count,
                             cycles_per_ns, &initial_timestamp);
                spikes_count = 0;
            }

            read_timestamp_counter(&t[ i % 2]);
        }
    }

    print_spikes(spikes, spikes_count,
                 cycles_per_ns, &initial_timestamp);

    const double cycles_per_ms = cpu_mhz * 1e3;

    fprintf(stdout, "Iterations count: %'zu\n"
                    "Sampling duration: %'.0lf ms\n"
                    "Detected frequency: %.0lf Mhz\n",
                    cmd_line.iteration_count,
                    cycle_since_timestamp(&initial_timestamp) / cycles_per_ms,
                    cpu_mhz);

    err = EXIT_SUCCESS;

    free(spikes);
calloc_failed:
get_cpu_mhz_failed:
arg_check_failed:
    return err;
}