C++ (Cpp) tsc_start 예제들

예제 #1

0

파일 보기

파일: spmv_loops_mt_numa.c 프로젝트: cslab-ntua/csx

static void *do_spmv_thread_main(void *arg)
{
    spm_mt_thread_t *spm_mt_thread = (spm_mt_thread_t *) arg;
    SPMV_NAME(_fn_t) *spmv_mt_fn = spm_mt_thread->spmv_fn;
    setaffinity_oncpu(spm_mt_thread->cpu);

    int i;
    tsc_t total_tsc, thread_tsc;

    tsc_init(&total_tsc);
    tsc_init(&thread_tsc);
    tsc_start(&total_tsc);
    for (i = 0; i < loops_nr; i++) {
        pthread_barrier_wait(&barrier);
        tsc_start(&thread_tsc);
        spmv_mt_fn(spm_mt_thread->spm, spm_mt_thread->data, y);
        tsc_pause(&thread_tsc);
        pthread_barrier_wait(&barrier);
    }

    tsc_pause(&total_tsc);
    spm_mt_thread->secs = tsc_getsecs(&thread_tsc);
    secs = tsc_getsecs(&total_tsc);
    tsc_shut(&thread_tsc);
    tsc_shut(&total_tsc);

    return (void *) 0;
}

예제 #2

0

파일 보기

파일: pseudo_lock.c 프로젝트: AaronH88/intel-cmt-cat

/**
 * @brief Timer handler procedure
 *
 * This is not a realistic workload and it is a demonstration code only.
 *
 * It runs couple thousand of iterations and each iteration is randomizing
 * memory locations to run a number of arithmetic operations on them.
 *
 * @param sig UNUSED
 * @param si UNUSED
 * @param uc UNUSED
 */
static void timer_handler(int sig, siginfo_t *si, void *uc)
{
        const int num_iterations = 5000;
        int *p = (int *) timer_data_ptr;
        const size_t sz = timer_data_size / sizeof(int);
        int m;

        (void) (sig);
        (void) (si);
        (void) (uc);

        tsc_start(&timer_prof);
        /* START - "latency sensitive" code */
        for (m = 0; m < num_iterations; m++) {
                const size_t stride = 5;
                const int idx0 = timer_rand() % (sz - stride);
                const int idx1 = timer_rand() % (sz - stride);
                size_t n;

                for (n = 0; n < stride; n++)
                        p[idx0 + n] = 2 * p[idx1 + n] + p[idx0 + n];
        }
        /* END - "latency sensitive" code */
        tsc_end(&timer_prof, 1);
}

예제 #3

0

파일 보기

파일: prfcnt-exec.c 프로젝트: cslab-ntua/csx

int main(int argc, char **argv)
{
	pid_t pid;
	int status;
	char **new_argv;
	prfcnt_t prfcnt;
	tsc_t timer;
	cpu_set_t cpu_set;
	int err;

	if ( argc < 2){
		printf("Usage: %s <cmd> (args)\n", argv[0]);
		exit(1);
	}

	new_argv = &argv[1];

	/*
	 * CPU affinity is inherited across a fork()
	 */
	CPU_ZERO(&cpu_set);
	CPU_SET(0,&cpu_set);

	err = sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpu_set);
	if (err){
		perror("sched_setaffinity");
		exit(1);
	}

	if ( (pid = fork()) < 0){
		perror("fork");
		exit(1);
	}

	tsc_init(&timer);
	prfcnt_init(&prfcnt,0,PRFCNT_FL_T0|PRFCNT_FL_T1);

	/*
	 * FIXME: Is this efficient enough ? Could it be done better ?
	 */
	if (pid) {
		prfcnt_start(&prfcnt);
		tsc_start(&timer);
		wait(&status);
		tsc_pause(&timer);
		prfcnt_pause(&prfcnt);

	} else {

		execv(argv[1],new_argv);
		perror("execv");
		exit(1);
	}

	tsc_report(&timer);
	prfcnt_report(&prfcnt);

	return 0;
}

예제 #4

0

파일 보기

파일: timespin.c 프로젝트: gdkar/contextswitch

int main(void) {
  const long iterations = 1000000000;
  struct timespec ts;
  uint64_t tsc;
  clock_start (&ts);
  tsc_start(&tsc);
  volatile int asignto = 0;
  for (long i = 0; i < iterations; i++) {
    asignto = function_call(i);
  }
  const long long unsigned delta = clock_end(&ts);
  const long long unsigned delta_tsc = tsc_end(&tsc);
  (void) asignto;
  printf("%ld spins in %lluns (%.1fns/spin, %.1f clocks/spin)\n",
         iterations, delta, (delta / (double) iterations),delta_tsc / (double)iterations);
  return 0;
}

예제 #5

0

파일 보기

파일: spmv_loops_mt.c 프로젝트: cslab-ntua/csx

static void *do_spmv_thread_main_swap(void *arg)
{
	spm_mt_thread_t *spm_mt_thread;
#ifdef SPMV_PRFCNT
	prfcnt_t *prfcnt;
#endif
	SPMV_NAME(_fn_t) *spmv_mt_fn;
	tsc_t tsc;

	spm_mt_thread = arg;
	spmv_mt_fn = spm_mt_thread->spmv_fn;
#ifdef SPMV_PRFCNT
	prfcnt = (prfcnt_t *) spm_mt_thread->data;
#endif
	setaffinity_oncpu(spm_mt_thread->cpu);

	VECTOR_NAME(_init_rand_range)(x, (ELEM_TYPE) -1000, (ELEM_TYPE) 1000);

	// Assert this is a square matrix and swap is ok.
	assert(x->size == y->size);
	tsc_init(&tsc);
	tsc_start(&tsc);
#ifdef SPMV_PRFCNT
	prfcnt_init(prfcnt, spm_mt_thread->cpu, PRFCNT_FL_T0 | PRFCNT_FL_T1);
	prfcnt_start(prfcnt);
#endif
	int i;
	for (i = 0; i < loops_nr; i++) {
		pthread_barrier_wait(&barrier);
		spmv_mt_fn(spm_mt_thread->spm, x, y);
		pthread_barrier_wait(&barrier);
		SWAP(x, y);
	}
	tsc_pause(&tsc);
#ifdef SPMV_PRFCNT
	prfcnt_pause(prfcnt);
#endif
	secs = tsc_getsecs(&tsc);
	tsc_shut(&tsc);

	return NULL;
}

예제 #6

0

파일 보기

파일: sum_openmp.c 프로젝트: kkourt/xarray

int
main(int argc, const char *argv[])
{
	unsigned nthreads;
	size_t nints;
	int sum1, sum2;
	int *arr;

	nints = 0;
	if (argc > 1)
		nints = atol(argv[1]);
	if (nints == 0)
		nints = 100000;

	#pragma omp parallel
	#pragma omp master
	nthreads = omp_get_num_threads();

	printf("Number of threads: %u\n", nthreads);
	printf("number of ints:    %lu\n", nints);
	arr = arr_int_mkrand(nints, &sum1);

	sum2 = 0;
	tsc_t t; tsc_init(&t); tsc_start(&t);
	#pragma omp parallel for reduction(+:sum2)
	for (size_t i=0; i<nints; i++) {
		sum2 += sum_op(arr[i]);
	}
	tsc_pause(&t);

	tsc_report("sum_OMP", &t);

	if (sum1 != sum2) {
		fprintf(stderr, "Error in sum: %d vs %d\n", sum1, sum2);
		abort();
	}

	printf("DONE\n");
	return 0;
}

예제 #7

0

파일 보기

파일: spmv_loops_sym_mt_numa.c 프로젝트: cslab-ntua/csx

static void *do_spmv_thread_main(void *arg)
{
	spm_mt_thread_t *spm_mt_thread = arg;
	SPMV_NAME(_sym_fn_t) *spmv_mt_sym_fn = spm_mt_thread->spmv_fn;

	setaffinity_oncpu(spm_mt_thread->cpu);

	tsc_t tsc;

	tsc_init(&tsc);
	tsc_start(&tsc);

	// Switch Reduction Phase
	int i/*, j, start, end*/;
	/*
	start = 0;
	end = n / ncpus;
	*/
	for (i = 0; i < nloops; i++) {
		// Switch Reduction Phase.
		VECTOR_NAME(_init_from_map)(temp, 0, spm_mt_thread->map);
		pthread_barrier_wait(&barrier);
		spmv_mt_sym_fn(spm_mt_thread->spm, spm_mt_thread->data, y, y);
		pthread_barrier_wait(&barrier);
		// Switch Reduction Phase.
		/*
		for (j = 0; j < ncpus; j++)
		 	VECTOR_NAME(_add_part)(y, temp[j], y, start, end);
		*/
		VECTOR_NAME(_add_from_map)(y, temp, y, spm_mt_thread->map);
		pthread_barrier_wait(&barrier);
	}

	tsc_pause(&tsc);
	secs = tsc_getsecs(&tsc);
	tsc_shut(&tsc);

	return NULL;
}

예제 #8

0

파일 보기

파일: versla.c 프로젝트: kkourt/xarray

int
main(int argc, const char *argv[])
{
	if (argc < 4) {
		fprintf(stderr,
		        "Usage: %s <array_size> <block_size> <accesses>\n",
			argv[0]);
		exit(1);
	}

	unsigned int asize = atol(argv[1]);
	unsigned int bsize = atol(argv[2]);
	unsigned int accesses = atol(argv[3]);
	unsigned int seed = time(NULL);

	tsc_t tc;
	/* normal pointers */
	srand(seed);
	printf("CoPy\n");
	unsigned int *p, *p_copy;
	unsigned int sum_copy = 0;
	p = xmalloc(asize*sizeof(unsigned int));
	for (unsigned int i=0; i<asize; i++)
		p[i] = i;
	tsc_init(&tc);
	tsc_start(&tc);
	p_copy = xmalloc(asize*sizeof(unsigned int));
	memcpy(p_copy, p, asize*sizeof(unsigned int));
	for (unsigned int j=0; j<accesses; j++) {
		unsigned int idx = rand() % asize;
		p_copy[idx] = 0;
	}
	#ifdef DO_SUMS
	for (unsigned int j=0; j<asize; j++) {
		sum_copy += p_copy[j];
	}
	#endif
	tsc_pause(&tc);
	tsc_report(&tc);

	/* versioned pointers */
	tsc_t t;
	srand(seed);
	printf("VerSions\n");
	unsigned int sum_versions = 0;
	sla_t *sla = sla_init(10, .5, 16, time(NULL));
	sla->def_nitems = bsize;
	for (unsigned int i=0; i<asize; i++)
		sla_append(sla, i);
	tsc_init(&t);
	tsc_start(&t);
	versla_t *versla = versla_init(sla);
	ver_t *v1 = versla_newver(versla, versla->vo.ver_base);
	for (unsigned int j=0; j<accesses; j++) {
		unsigned int idx = rand() % asize;
		versla_set(versla, idx, 0, v1);
	}
	#ifdef DO_SUMS
	for (unsigned int j=0; j<asize; j++) {
		unsigned int x = versla_get(versla, j, v1);
		sum_versions += x;
	}
	#endif
	tsc_pause(&t);
	tsc_report(&t);

	printf("\ntC/tV=%lf\n", (double)tsc_getticks(&tc)/(double)tsc_getticks(&t));
	for (unsigned int j=0; j<asize; j++) {
		unsigned int x0 = p_copy[j];
		unsigned int x1 = versla_get(versla, j, v1);
		if (x0 != x1) {
			fprintf(stderr, "copy:%d and versions:%d differ for j=%d\n", x0, x1, j);
		}
	}
	assert(sum_versions == sum_copy);
	return 0;
}