int main(int argc, char **argv) { pid_t pid; int status; char **new_argv; prfcnt_t prfcnt; tsc_t timer; cpu_set_t cpu_set; int err; if ( argc < 2){ printf("Usage: %s <cmd> (args)\n", argv[0]); exit(1); } new_argv = &argv[1]; /* * CPU affinity is inherited across a fork() */ CPU_ZERO(&cpu_set); CPU_SET(0,&cpu_set); err = sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpu_set); if (err){ perror("sched_setaffinity"); exit(1); } if ( (pid = fork()) < 0){ perror("fork"); exit(1); } tsc_init(&timer); prfcnt_init(&prfcnt,0,PRFCNT_FL_T0|PRFCNT_FL_T1); /* * FIXME: Is this efficient enough ? Could it be done better ? */ if (pid) { prfcnt_start(&prfcnt); tsc_start(&timer); wait(&status); tsc_pause(&timer); prfcnt_pause(&prfcnt); } else { execv(argv[1],new_argv); perror("execv"); exit(1); } tsc_report(&timer); prfcnt_report(&prfcnt); return 0; }
int main(int argc, const char *argv[]) { unsigned nthreads; size_t nints; int sum1, sum2; int *arr; nints = 0; if (argc > 1) nints = atol(argv[1]); if (nints == 0) nints = 100000; #pragma omp parallel #pragma omp master nthreads = omp_get_num_threads(); printf("Number of threads: %u\n", nthreads); printf("number of ints: %lu\n", nints); arr = arr_int_mkrand(nints, &sum1); sum2 = 0; tsc_t t; tsc_init(&t); tsc_start(&t); #pragma omp parallel for reduction(+:sum2) for (size_t i=0; i<nints; i++) { sum2 += sum_op(arr[i]); } tsc_pause(&t); tsc_report("sum_OMP", &t); if (sum1 != sum2) { fprintf(stderr, "Error in sum: %d vs %d\n", sum1, sum2); abort(); } printf("DONE\n"); return 0; }
int main(int argc, const char *argv[]) { if (argc < 4) { fprintf(stderr, "Usage: %s <array_size> <block_size> <accesses>\n", argv[0]); exit(1); } unsigned int asize = atol(argv[1]); unsigned int bsize = atol(argv[2]); unsigned int accesses = atol(argv[3]); unsigned int seed = time(NULL); tsc_t tc; /* normal pointers */ srand(seed); printf("CoPy\n"); unsigned int *p, *p_copy; unsigned int sum_copy = 0; p = xmalloc(asize*sizeof(unsigned int)); for (unsigned int i=0; i<asize; i++) p[i] = i; tsc_init(&tc); tsc_start(&tc); p_copy = xmalloc(asize*sizeof(unsigned int)); memcpy(p_copy, p, asize*sizeof(unsigned int)); for (unsigned int j=0; j<accesses; j++) { unsigned int idx = rand() % asize; p_copy[idx] = 0; } #ifdef DO_SUMS for (unsigned int j=0; j<asize; j++) { sum_copy += p_copy[j]; } #endif tsc_pause(&tc); tsc_report(&tc); /* versioned pointers */ tsc_t t; srand(seed); printf("VerSions\n"); unsigned int sum_versions = 0; sla_t *sla = sla_init(10, .5, 16, time(NULL)); sla->def_nitems = bsize; for (unsigned int i=0; i<asize; i++) sla_append(sla, i); tsc_init(&t); tsc_start(&t); versla_t *versla = versla_init(sla); ver_t *v1 = versla_newver(versla, versla->vo.ver_base); for (unsigned int j=0; j<accesses; j++) { unsigned int idx = rand() % asize; versla_set(versla, idx, 0, v1); } #ifdef DO_SUMS for (unsigned int j=0; j<asize; j++) { unsigned int x = versla_get(versla, j, v1); sum_versions += x; } #endif tsc_pause(&t); tsc_report(&t); printf("\ntC/tV=%lf\n", (double)tsc_getticks(&tc)/(double)tsc_getticks(&t)); for (unsigned int j=0; j<asize; j++) { unsigned int x0 = p_copy[j]; unsigned int x1 = versla_get(versla, j, v1); if (x0 != x1) { fprintf(stderr, "copy:%d and versions:%d differ for j=%d\n", x0, x1, j); } } assert(sum_versions == sum_copy); return 0; }