示例#1
0
static void *do_spmv_thread_main(void *arg)
{
    spm_mt_thread_t *spm_mt_thread = (spm_mt_thread_t *) arg;
    SPMV_NAME(_fn_t) *spmv_mt_fn = spm_mt_thread->spmv_fn;
    setaffinity_oncpu(spm_mt_thread->cpu);

    int i;
    tsc_t total_tsc, thread_tsc;

    tsc_init(&total_tsc);
    tsc_init(&thread_tsc);
    tsc_start(&total_tsc);
    for (i = 0; i < loops_nr; i++) {
        pthread_barrier_wait(&barrier);
        tsc_start(&thread_tsc);
        spmv_mt_fn(spm_mt_thread->spm, spm_mt_thread->data, y);
        tsc_pause(&thread_tsc);
        pthread_barrier_wait(&barrier);
    }

    tsc_pause(&total_tsc);
    spm_mt_thread->secs = tsc_getsecs(&thread_tsc);
    secs = tsc_getsecs(&total_tsc);
    tsc_shut(&thread_tsc);
    tsc_shut(&total_tsc);

    return (void *) 0;
}
示例#2
0
int main(int argc, char **argv)
{
	pid_t pid;
	int status;
	char **new_argv;
	prfcnt_t prfcnt;
	tsc_t timer;
	cpu_set_t cpu_set;
	int err;

	if ( argc < 2){
		printf("Usage: %s <cmd> (args)\n", argv[0]);
		exit(1);
	}

	new_argv = &argv[1];

	/*
	 * CPU affinity is inherited across a fork()
	 */
	CPU_ZERO(&cpu_set);
	CPU_SET(0,&cpu_set);

	err = sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpu_set);
	if (err){
		perror("sched_setaffinity");
		exit(1);
	}

	if ( (pid = fork()) < 0){
		perror("fork");
		exit(1);
	}

	tsc_init(&timer);
	prfcnt_init(&prfcnt,0,PRFCNT_FL_T0|PRFCNT_FL_T1);

	/*
	 * FIXME: Is this efficient enough ? Could it be done better ?
	 */
	if (pid) {
		prfcnt_start(&prfcnt);
		tsc_start(&timer);
		wait(&status);
		tsc_pause(&timer);
		prfcnt_pause(&prfcnt);

	} else {

		execv(argv[1],new_argv);
		perror("execv");
		exit(1);
	}

	tsc_report(&timer);
	prfcnt_report(&prfcnt);

	return 0;
}
示例#3
0
static void *do_spmv_thread_main_swap(void *arg)
{
	spm_mt_thread_t *spm_mt_thread;
#ifdef SPMV_PRFCNT
	prfcnt_t *prfcnt;
#endif
	SPMV_NAME(_fn_t) *spmv_mt_fn;
	tsc_t tsc;

	spm_mt_thread = arg;
	spmv_mt_fn = spm_mt_thread->spmv_fn;
#ifdef SPMV_PRFCNT
	prfcnt = (prfcnt_t *) spm_mt_thread->data;
#endif
	setaffinity_oncpu(spm_mt_thread->cpu);

	VECTOR_NAME(_init_rand_range)(x, (ELEM_TYPE) -1000, (ELEM_TYPE) 1000);

	// Assert this is a square matrix and swap is ok.
	assert(x->size == y->size);
	tsc_init(&tsc);
	tsc_start(&tsc);
#ifdef SPMV_PRFCNT
	prfcnt_init(prfcnt, spm_mt_thread->cpu, PRFCNT_FL_T0 | PRFCNT_FL_T1);
	prfcnt_start(prfcnt);
#endif
	int i;
	for (i = 0; i < loops_nr; i++) {
		pthread_barrier_wait(&barrier);
		spmv_mt_fn(spm_mt_thread->spm, x, y);
		pthread_barrier_wait(&barrier);
		SWAP(x, y);
	}
	tsc_pause(&tsc);
#ifdef SPMV_PRFCNT
	prfcnt_pause(prfcnt);
#endif
	secs = tsc_getsecs(&tsc);
	tsc_shut(&tsc);

	return NULL;
}
示例#4
0
int
main(int argc, const char *argv[])
{
	unsigned nthreads;
	size_t nints;
	int sum1, sum2;
	int *arr;

	nints = 0;
	if (argc > 1)
		nints = atol(argv[1]);
	if (nints == 0)
		nints = 100000;

	#pragma omp parallel
	#pragma omp master
	nthreads = omp_get_num_threads();

	printf("Number of threads: %u\n", nthreads);
	printf("number of ints:    %lu\n", nints);
	arr = arr_int_mkrand(nints, &sum1);

	sum2 = 0;
	tsc_t t; tsc_init(&t); tsc_start(&t);
	#pragma omp parallel for reduction(+:sum2)
	for (size_t i=0; i<nints; i++) {
		sum2 += sum_op(arr[i]);
	}
	tsc_pause(&t);

	tsc_report("sum_OMP", &t);

	if (sum1 != sum2) {
		fprintf(stderr, "Error in sum: %d vs %d\n", sum1, sum2);
		abort();
	}

	printf("DONE\n");
	return 0;
}
示例#5
0
static void *do_spmv_thread_main(void *arg)
{
	spm_mt_thread_t *spm_mt_thread = arg;
	SPMV_NAME(_sym_fn_t) *spmv_mt_sym_fn = spm_mt_thread->spmv_fn;

	setaffinity_oncpu(spm_mt_thread->cpu);

	tsc_t tsc;

	tsc_init(&tsc);
	tsc_start(&tsc);

	// Switch Reduction Phase
	int i/*, j, start, end*/;
	/*
	start = 0;
	end = n / ncpus;
	*/
	for (i = 0; i < nloops; i++) {
		// Switch Reduction Phase.
		VECTOR_NAME(_init_from_map)(temp, 0, spm_mt_thread->map);
		pthread_barrier_wait(&barrier);
		spmv_mt_sym_fn(spm_mt_thread->spm, spm_mt_thread->data, y, y);
		pthread_barrier_wait(&barrier);
		// Switch Reduction Phase.
		/*
		for (j = 0; j < ncpus; j++)
		 	VECTOR_NAME(_add_part)(y, temp[j], y, start, end);
		*/
		VECTOR_NAME(_add_from_map)(y, temp, y, spm_mt_thread->map);
		pthread_barrier_wait(&barrier);
	}

	tsc_pause(&tsc);
	secs = tsc_getsecs(&tsc);
	tsc_shut(&tsc);

	return NULL;
}
示例#6
0
文件: versla.c 项目: kkourt/xarray
int
main(int argc, const char *argv[])
{
	if (argc < 4) {
		fprintf(stderr,
		        "Usage: %s <array_size> <block_size> <accesses>\n",
			argv[0]);
		exit(1);
	}

	unsigned int asize = atol(argv[1]);
	unsigned int bsize = atol(argv[2]);
	unsigned int accesses = atol(argv[3]);
	unsigned int seed = time(NULL);

	tsc_t tc;
	/* normal pointers */
	srand(seed);
	printf("CoPy\n");
	unsigned int *p, *p_copy;
	unsigned int sum_copy = 0;
	p = xmalloc(asize*sizeof(unsigned int));
	for (unsigned int i=0; i<asize; i++)
		p[i] = i;
	tsc_init(&tc);
	tsc_start(&tc);
	p_copy = xmalloc(asize*sizeof(unsigned int));
	memcpy(p_copy, p, asize*sizeof(unsigned int));
	for (unsigned int j=0; j<accesses; j++) {
		unsigned int idx = rand() % asize;
		p_copy[idx] = 0;
	}
	#ifdef DO_SUMS
	for (unsigned int j=0; j<asize; j++) {
		sum_copy += p_copy[j];
	}
	#endif
	tsc_pause(&tc);
	tsc_report(&tc);

	/* versioned pointers */
	tsc_t t;
	srand(seed);
	printf("VerSions\n");
	unsigned int sum_versions = 0;
	sla_t *sla = sla_init(10, .5, 16, time(NULL));
	sla->def_nitems = bsize;
	for (unsigned int i=0; i<asize; i++)
		sla_append(sla, i);
	tsc_init(&t);
	tsc_start(&t);
	versla_t *versla = versla_init(sla);
	ver_t *v1 = versla_newver(versla, versla->vo.ver_base);
	for (unsigned int j=0; j<accesses; j++) {
		unsigned int idx = rand() % asize;
		versla_set(versla, idx, 0, v1);
	}
	#ifdef DO_SUMS
	for (unsigned int j=0; j<asize; j++) {
		unsigned int x = versla_get(versla, j, v1);
		sum_versions += x;
	}
	#endif
	tsc_pause(&t);
	tsc_report(&t);

	printf("\ntC/tV=%lf\n", (double)tsc_getticks(&tc)/(double)tsc_getticks(&t));
	for (unsigned int j=0; j<asize; j++) {
		unsigned int x0 = p_copy[j];
		unsigned int x1 = versla_get(versla, j, v1);
		if (x0 != x1) {
			fprintf(stderr, "copy:%d and versions:%d differ for j=%d\n", x0, x1, j);
		}
	}
	assert(sum_versions == sum_copy);
	return 0;
}