/** Log performance counters into "pstats.dat" */
static void log_counters( FILE *out, const char *phase, long long *pTic, long long values[] ) {
    long long toc = PAPI_get_real_usec();

#ifdef CACHE_PROFILE
    if( PAPI_read_counters( values, 4 ) != PAPI_OK ) {
        handle_error( 1 );
    }

    fprintf( out, "%s PAPI_L2_TCM %lld\n", phase, values[0] );
    fprintf( out, "%s PAPI_L2_TCA %lld\n", phase, values[2] );
    fprintf( out, "%s L2MissRate %.4lf%\n", phase, ( double )values[0] / ( double )values[2] );
    fprintf( out, "%s PAPI_L3_TCM %lld\n", phase, values[1] );
    fprintf( out, "%s PAPI_L3_TCA %lld\n", phase, values[3] );
    fprintf( out, "%s L3MissRate %.4lf%\n", phase, ( double )values[1] / ( double )values[3] );
#else
    if( PAPI_read_counters( values, 1 ) != PAPI_OK ) {
        handle_error( 1 );
    }

    fprintf( out, "%s PAPI_FP_OPS %lld\n", phase, values[0] );
#endif

    fprintf( out, "%s RealTime %.4lfs\n", phase, ( toc - *pTic ) * 1e-6 );
    *pTic = PAPI_get_real_usec();
}
Пример #2
0
int main(int argc, char *argv[]) {

     double a[MAXVSIZE], b[MAXVSIZE], c[MAXVSIZE];
     int i,n;
     long long before, after;


     if (PAPI_VER_CURRENT != 
		PAPI_library_init(PAPI_VER_CURRENT))
	ehandler("PAPI_library_init error.");

     const size_t EVENT_MAX = PAPI_num_counters();
        printf("# Max counters = %zd\n", EVENT_MAX);

     if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
	        ehandler("Cannot count PAPI_TOT_INS.");

     if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
	        ehandler("Cannot count PAPI_FP_OPS.");

     size_t EVENT_COUNT = 2;
     int events[] = { PAPI_TOT_INS, PAPI_FP_OPS };
     long long values[EVENT_COUNT];


     printf("Enter vector size:  ");
     scanf("%d",&n);

     for (i=0;i<n;i++) {
       a[i] = i;
       b[i] = n-i;
     }

     PAPI_start_counters(events, EVENT_COUNT);

     if(PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
               ehandler("Problem reading counters.");

     loop(c,a,b,n);

     if(PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
               ehandler("Problem reading counters.");

     printf("Number of instructions = %lld\n",values[0]);
     printf("Number of fp operations = %lld\n",values[1]);
     return 0;

}
Пример #3
0
double
papi_generic_call()
{
  long_long tmp[1];

  /* Read and reset the counters.
   * The commented out conditional affects the reading of the performance
   * counters, but might be good for debugging.
   * NOTE: PAPI_accum_counters does not work properly.
   * */ 
#if 0
    if (PAPI_read_counters(tmp, 1) != PAPI_OK)
      papi_eprintf("Problem reading counters %s:%d.\n", __FILE__, __LINE__);
#else
    PAPI_read_counters(tmp, 1);
#endif
    return ((double) tmp[0]);
}
Пример #4
0
int main(int argc, char **argv) {

	int retval;

	retval = PAPI_library_init(PAPI_VER_CURRENT);
	if (retval != PAPI_VER_CURRENT) {
		fprintf(stderr,"Error! PAPI_library_init %d\n", retval);
	}

	retval = PAPI_query_event(PAPI_TOT_INS);
	if (retval != PAPI_OK) {
		fprintf(stderr,"PAPI_TOT_INS not supported\n");
		exit(1);
	}

	int i;
	int events[1],result;
	long long counts[1];

	long long total=0,average,max=0,min=0x7ffffffffffffffULL;

	events[0]=PAPI_TOT_INS;

	PAPI_start_counters(events,1);

	for(i=0;i<NUM_RUNS;i++) {


		result=instructions_million();

		PAPI_read_counters(counts,1);

		results[i]=counts[0];

 	}

	PAPI_stop_counters(counts,1);


	PAPI_shutdown();

	for(i=0;i<NUM_RUNS;i++) {
		total+=results[i];
		if (results[i]>max) max=results[i];
		if (results[i]<min) min=results[i];
	}

	average=total/NUM_RUNS;
	printf("Average=%lld max=%lld min=%lld\n",average,max,min);

	(void) result;

	return 0;
}
Пример #5
0
int main (int argc, char *argv[]) {
	int i, count;
	int *array = (int*) malloc (SIZE * sizeof(int));
	uint64_t start, end;
    int events[3] = { PAPI_L1_DCM, PAPI_L2_DCM, PAPI_L3_DCM };
    long long misses[3];
    int papilevels = 3;

    if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
        exit(1);
    }

	//Initialization
	count = 0;
	srand(time(NULL));
	init_time();
	for (i = 0; i < SIZE; i++)
		array[i] = rand();

	
	// Uncomment this line to 
	qsort(array, SIZE, sizeof (int), compare_ints);

	//Measurement
    while (PAPI_start_counters(events, papilevels) != PAPI_OK) {
        papilevels--;
    }
	start = get_time();
	/*
	 * É possível, em um vetor ordenado, fazer a contagem 
	 * em tempo O(lg(n)) em vez de O(n) utilizando busca 
	 * binária. Embora isto diminua sensivelmente o tempo 
	 * este não é o intuito deste exercício.
	 *
	 */
	for (i = 0; i < SIZE; i++)
		if (array[i] < RAND_MAX / 2)
			count++;
	end = get_time();
	uint64_t exec_time = diff_time(start, end);
    if (PAPI_read_counters(misses, papilevels) != PAPI_OK) {
        fprintf(stderr, "Erro em PAPI_read_counters\n");
        exit(1);
    }

	printf("Time: %" PRIu64 " Count %d\n",  exec_time, count);
    for (i = 0; i < papilevels; i++) {
        printf("Cache misses (L%d): %lld\n", i+1, misses[i]);
    }
	free(array);
	return 0;
}
Пример #6
0
int test_measure(char* phase)
{
    if ( PAPI_read_counters( values, NUM_EVENTS ) != PAPI_OK )
        return -1;
    else {
        float rate = (float)values[0] / (float)(values[1]);
        fprintf(res_file, "%s_PAPI_L1_TCM=%lld\n", phase, values[0]);
        fprintf(res_file, "%s_PAPI_L1_TCA=%lld\n", phase, values[1]);
        fprintf(res_file, "%s_L1MissRate=%f%%\n",  phase, (rate * 100.0));

        return 0;
    }
}
Пример #7
0
inline 
void 
papi_reset(size_t n) 
{
  long_long *papi_tmp;

  papi_tmp = malloc(sizeof(*papi_tmp) * n);

  if (PAPI_read_counters(papi_tmp, n) != PAPI_OK)
    papi_eprintf("Problem reading counters %s:%d.\n", __FILE__, __LINE__);

  free(papi_tmp);
}
int main () 
{
    float t0, t1;
    int iter, i, j;
    int events[2] = {PAPI_L1_DCM, PAPI_FP_OPS }, ret;
    long_long values[2];

    if (PAPI_num_counters() < 2) {
        fprintf(stderr, "No hardware counters here, or PAPI not supported.\n");
        exit(1);
    }
    for (i = 0; i < MX; i++) {
        if ((ad[i] = malloc(sizeof(double)*MX)) == NULL) {
            fprintf(stderr,"malloc failed\n");
            exit(1);
        }
    }
    for (j = 0; j < MX; j++) { 
        for (i = 0; i < MX; i++) {
            ad[i][j] = 1.0/3.0; /* Initialize the data */
        }
    }
    t0 = gettime();
    if ((ret = PAPI_start_counters(events, 2)) != PAPI_OK) {
        fprintf(stderr, "PAPI failed to start counters: %s\n", PAPI_strerror(ret));
        exit(1);
    }
    for (iter = 0; iter < NITER; iter++) {
        for (j = 0; j < MX; j++) {
            for (i = 0; i < MX; i++) {
                ad[i][j] += ad[i][j] * 3.0;
            }
        }
    }
    if ((ret = PAPI_read_counters(values, 2)) != PAPI_OK) {
        fprintf(stderr, "PAPI failed to read counters: %s\n", PAPI_strerror(ret));
        exit(1);
    }
    t1 = gettime();

    printf("Total software flops = %f\n",(float)TOT_FLOPS);
    printf("Total hardware flops = %lld\n",(float)values[1]);
    printf("MFlop/s = %f\n", (float)(TOT_FLOPS/MEGA)/(t1-t0));
    printf("L1 data cache misses is %lld\n", values[0]);
}
Пример #9
0
int main()
{
  //this will fail if some counters can't be accessed
  if (PAPI_start_counters(papi_events, n_papi_events) != PAPI_OK)
    {
      printf("failed to start papi\n");
      return 1;
    }

  doWork(123);

  if (PAPI_read_counters(papi_values[0], n_papi_events) != PAPI_OK)
    {
      printf("failed to read countess\n");
      return 1;
    }

  printf("counters' values: misses = %d, accesses = %d\n", papi_values[0][0], papi_values[0][1]);

  return 0;
}
static void start_sssp(FibHeap<size_t, size_t> *pq,
                       vertex_t *graph)
{

#ifdef PAPI
    if (PAPI_OK != PAPI_start_counters(g_events, G_EVENT_COUNT)) {
        std::cout << ("Problem starting counters 1.\n");
    }
#endif


    while (!pq->empty()) {
        size_t distance;
        size_t node;
        pq->pop(distance, node);
        vertex_t *v = &graph[node];
        size_t v_dist = v->distance;
        for (size_t i = 0; i < v->num_edges; i++) {
            const edge_t *e = &v->edges[i];
            const size_t new_dist = v_dist + e->weight;
            vertex_t *w = &graph[e->target];
            size_t w_dist = w->distance;

            if (new_dist < w_dist) {
                w->distance = new_dist;
                if (w->n == NULL) {
                    w->n = pq->push(new_dist, e->target);
                } else {
                    pq->decrease_key(w->n, new_dist);
                }
            }
        }
    }
#ifdef PAPI
    if (PAPI_OK != PAPI_read_counters(g_values[0], G_EVENT_COUNT)) {
        std::cout << ("Problem reading counters 2.\n");
    }
#endif
}
Пример #11
0
void my_papi_stop(int *events, int NUM_EVENTS)
{
	int j;
	/* Read the counters */
	if (PAPI_read_counters(values, NUM_EVENTS) != PAPI_OK) {
			fprintf(stderr, "PAPI_read_counters - FAILED\n");
			exit(1);
	}

	for (j=0; j<NUM_EVENTS; j++) 
	{
    printf("GG: %d : %lld\n", events[j], values[j]);
	}
	/* Stop counting events */
	if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) {
			fprintf(stderr, "PAPI_stoped_counters - FAILED\n");
			exit(1);
	}

	if (values != NULL) {
		free(values);
	}
	
}
Пример #12
0
void
papi_set_events(char *metric)
{
  const size_t n = 1;

  int max;
  long_long *papi_tmp;
  int papi_events[1];
  int code;

  max = PAPI_num_counters();

  if (n > max)
    papi_eprintf("Too many counters requested.\n");

  papi_tmp = malloc(sizeof(*papi_tmp) * n);

  PAPI_reset(max);

  PAPI_stop_counters(papi_tmp, n);

  if (PAPI_event_name_to_code(metric, &code) != PAPI_OK)
    papi_eprintf("Unknown PAPI event %s.\n", metric);

  if (code == 0)
    papi_eprintf("Unknown PAPI event %s.\n", metric);

  papi_events[0] = code;

  PAPI_start_counters(papi_events, n);

  if (PAPI_read_counters(papi_tmp, n) != PAPI_OK)
    papi_eprintf("Problem reading counters %s:%d.\n", __FILE__, __LINE__);

  free(papi_tmp);
}
Пример #13
0
void* test(void *data) {
  int unext, last = -1; 
  val_t val = 0;
  pval_t pval = 0;

  thread_data_t *d = (thread_data_t *)data;

  /* Create transaction */
  TM_THREAD_ENTER(d->id);
  set_cpu(the_cores[d->id]);
  /* Wait on barrier */
  ssalloc_init();
  PF_CORRECTION;

  seeds = seed_rand();

#ifdef PIN
  int id = d->id;
  int cpu = 40*(id/40) + 4*(id%10) + (id%40)/10;
  // printf("Pinning %d to %d\n",id,cpu);
  pin(pthread_self(), cpu);
  //  pin(pthread_self(), id);
#endif

 #ifdef PAPI
    if (PAPI_OK != PAPI_start_counters(g_events, G_EVENT_COUNT))
  {
    printf("Problem starting counters 1.");
  }
 #endif


  barrier_cross(d->barrier);

  /* Is the first op an update? */
  unext = (rand_range_re(&d->seed, 100) - 1 < d->update);

#ifdef DISTRIBUTION_EXPERIMENT
  while (1)
#else
  while (*running)
#endif
    {		
      if (d->es) { // event simulator experiment
        if (d->lin) {
          if (!empty(d->linden_set)) {
            d->nb_remove++;
            pval_t pval = deletemin(d->linden_set, d);
            d->nb_removed++;

  //           printf("%d %d\n", pval, deps[pval][0]);

            int i = 0;
            val_t dep;
            while ((dep = deps[pval][i]) != -1 && i < MAX_DEPS) {
              d->nb_add++;
              if (insert(d->linden_set, dep, dep)) {
                d->nb_added++;
              }
              i++;
            }
          }
        } else {
          if (d->set->head->next[0]->next[0] != NULL) {// set not empty
            d->nb_remove++;
            if (d->sl) { // spray list
              if (spray_delete_min(d->set, &val, d)) {
                d->nb_removed++;
              } else {
                continue;
              }
            } else if (d->pq) { // lotan_shavit pq
              if (lotan_shavit_delete_min(d->set, &val, d)) {
                d->nb_removed++;
                //         continue; // TODO: maybe try remove this to simulate task handling (dependency checks still occur)
              } else {
                continue;
              }
            }

            //         struct timespec ten_usec;
            //         ten_usec.tv_sec = 0;
            //         ten_usec.tv_nsec = 10000;
            //         nanosleep(&ten_usec, NULL);

            // dependency handling
            int i = 0;
            val_t dep;
            while ((dep = deps[val][i]) != -1 && i < MAX_DEPS) {
              if (!sl_contains(d->set, dep, TRANSACTIONAL)) { // dependent has been removed, need to add it again
                if (sl_add(d->set, dep, TRANSACTIONAL)) { // check if insert actually succeeded (otherwise someone else did it first)
                  d->nb_added++;
                }
                d->nb_add++;
              }
              i++;
            }
          }
        }
      } else { // not event simulator
        if (unext) { // update

          if (last < 0) { // add
            val = rand_range_re(&d->seed, d->range);
            if (d->lin) {
              pval = val;
              insert(d->linden_set, pval, pval);
              d->nb_added++;
              last = pval;
            } else { // not linden
              if (sl_add(d->set, val, TRANSACTIONAL)) {
                d->nb_added++;
                last = val;
              } 				
            }
            d->nb_add++;

          } else { // remove

            if (d->pq) {
              if (lotan_shavit_delete_min(d->set, &val, d)) {
                d->nb_removed++;
                if (d->first_remove == -1) {
                  d->first_remove = val;
                }
              }
                last = -1;
            }
            else if (d->sl) {
              if (spray_delete_min(d->set, &val, d)) {
                d->nb_removed++;
                if (d->first_remove == -1) {
                  d->first_remove = val;
                }
                last = -1;
              }
            }
            else if (d->lin) {
              if ((pval = deletemin(d->linden_set, d))) {
                d->nb_removed++;
                if (d->first_remove == -1) {
                  d->first_remove = pval;
                }
                last = -1;
              }
            }
            else if (d->alternate) { // alternate mode (default)
              if (sl_remove(d->set, last, TRANSACTIONAL)) {
                d->nb_removed++;
                if (d->first_remove == -1) {
                  d->first_remove = val;
                }
              } 
              last = -1;
            } else {
              /* Random computation only in non-alternated cases */
              val = rand_range_re(&d->seed, d->range);
              /* Remove one random value */
              if (sl_remove_succ(d->set, val, TRANSACTIONAL)) {
                d->nb_removed++;
                if (d->first_remove == -1) {
                  d->first_remove = val;
                }
                /* Repeat until successful, to avoid size variations */
                last = -1;
              } 
            }
            d->nb_remove++;
          }

        } else { // read

          if (d->alternate) {
            if (d->update == 0) {
              if (last < 0) {
                val = d->first;
                last = val;
              } else { // last >= 0
                val = rand_range_re(&d->seed, d->range);
                last = -1;
              }
            } else { // update != 0
              if (last < 0) {
                val = rand_range_re(&d->seed, d->range);
                //last = val;
              } else {
                val = last;
              }
            }
          }	else val = rand_range_re(&d->seed, d->range);

          PF_START(2);
          if (sl_contains(d->set, val, TRANSACTIONAL)) 
            d->nb_found++;
          PF_STOP(2);	
          d->nb_contains++;
        }

        /* Is the next op an update? */
        if (d->effective) { // a failed remove/add is a read-only tx
          unext = ((100 * (d->nb_added + d->nb_removed))
              < (d->update * (d->nb_add + d->nb_remove + d->nb_contains)));
        } else { // remove/add (even failed) is considered as an update
          unext = (rand_range_re(&d->seed, 100) - 1 < d->update);
        }
      }

#ifdef DISTRIBUTION_EXPERIMENT
      if (d->first_remove != -1) {
        break; //only one run
      }
#endif

    }
#ifdef PAPI
  if (PAPI_OK != PAPI_read_counters(g_values[d->id], G_EVENT_COUNT))
  {
    printf("Problem reading counters 2.");
  }
#endif

  /* Free transaction */
  TM_THREAD_EXIT();

  PF_PRINT;

  return NULL;
}
Пример #14
0
main(int argc, char *argv[])
{
float **a,**b,**c;
int n,n1,n2;
int i,j;
//double t0,t1;
struct timeval t0,t1;
long mtime, seconds, useconds;

// Using PAPI - from countloop.c
if (PAPI_VER_CURRENT !=
    PAPI_library_init(PAPI_VER_CURRENT))
    ehandler("PAPI_library_init error.");

const size_t EVENT_MAX = PAPI_num_counters();
// Suppressing output
//    printf("# Max counters = %zd\n", EVENT_MAX);

if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
    ehandler("Cannot count PAPI_TOT_INS.");

if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
    ehandler("Cannot count PAPI_FP_OPS.");

if (PAPI_OK != PAPI_query_event(PAPI_L1_DCM))
    ehandler("Cannot count PAPI_L1_DCM.");

size_t EVENT_COUNT = 3;
int events[] = { PAPI_TOT_INS, PAPI_FP_OPS, PAPI_L1_DCM };
long long values[EVENT_COUNT];

// Take size from args, not prompt
// printf("Enter n:  ");  scanf("%d",&n);  printf("n = %d\n",n);
n = atoi(argv[1]);

//printf("Enter n1:  ");  scanf("%d",&n1);  printf("n1 = %d\n",n1);
//printf("Enter n2:  ");  scanf("%d",&n2);  printf("n2 = %d\n",n2);

// To conform to the other matrix functions
n1 = floor(sqrt(n));
n2 = n1;
n = n1*n2;
//printf("n = %d X %d = %d\n",n1,n2,n);
a = matrix(1,n,1,n);
for (i=1;i<=n;i++) 
    for (j=1;j<=n;j++) 
        a[i][j] = i+j;

b = matrix(1,n,1,n);
for (i=1;i<=n;i++) 
    for (j=1;j<=n;j++) 
        b[i][j] = i-j;

//#ifdef PRINT
//print_matrix(a,1,n,1,n);
//printf("\n"); */
//print_matrix(b,1,n,1,n);
//printf("\n"); */
//#endif

//t0 = get_seconds();
//c = matrix_prod(n,n,n,n,a,b);
//t1 = get_seconds();
//printf("Time for matrix_prod = %f sec\n",t1-t0);

//t0 = get_seconds();
gettimeofday(&t0, NULL);
// Start PAPI
PAPI_start_counters(events, EVENT_COUNT);

if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
    ehandler("Problem reading counters.");

c = block_prod(n1,n1,n1,n2,n2,n2,a,b);

if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
    ehandler("Problem reading counters.");

//t1 = get_seconds();
//printf("Time for block_prod = %f sec\n",t1-t0);
gettimeofday(&t1, NULL);
seconds = t1.tv_sec - t0.tv_sec;
useconds = t1.tv_usec - t0.tv_usec;
mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
//printf("Time for matrix_prod = %f sec\n",t1-t0);
printf("%d\t%lld\t%lld\t%lld\t%ld\n", n, values[0], values[1],
    values[2], mtime);
}
Пример #15
0
void* Thread(void *userData) {

    ThreadInfo *info = (ThreadInfo*) userData;
    Context *c = info->c;

    int index = info->index;
    int threadCount = c->threadCount;
    int64_t repetitionCount = c->repetitionCount;

    uint64_t me = 0x1 << index;
    uint64_t full = 0x0000000000000000;

    uint64_t copy; //thread local copy of the entry/exit barrier

    for (int i = 0; i < threadCount; ++i) {
        full |= 0x1 << i;
    }

    // set thread affinity
    cpu_set_t cpuset;
    CPU_ZERO(&cpuset);
    CPU_SET(index, &cpuset);
    assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) == 0);

    //DEBUG
    //pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
    //printf("%i uses cpus: ", index);
    //for (int i = 0; i < threadCount; ++i) {
    //    if (CPU_ISSET(i, &cpuset)) {
    //        printf("%i, ", i);
    //    }
    //}
    //printf("\n");


    int threadToBeRecorded = -1;
    int papiEvents[3] = {0x8000003b, 0x80000000, 0x80000002};
    long long papiStart[3] = {0, 0, 0};
    long long papiEnd[3] = {0, 0, 0};

    if (index == threadToBeRecorded) {
        int ret = PAPI_start_counters(papiEvents, 3);
        if (ret != 0) {
            printf("thread %i: PAPI_start_counters %i\n", index, ret);
            assert(0);
        }
        ret = PAPI_read_counters(papiStart, 3);
        if (ret != 0) {
            printf("thread %i: PAPI_read_counters %i\n", index, ret);
            assert(0);
        }
    }


    //unlink("a");
    //FILE *log = fopen("a", "a");

    for(int64_t repetition = 0; repetition < repetitionCount; repetition++){

        if (c->left == 0) { /* *** if () { UNIFIED ENTRY *********************/

            /* run to wall and wait busily */
            do {
                copy = c->entry;
                //fprintf(log, "%i r %lli\n", prime, (long long) copy);
                //fflush(log);
                if ((copy & me) == 0) {
                    copy |= me;
                    c->entry = copy;
                    //fprintf(log, "%i w %lli\n", prime, (long long) copy);
                    //fflush(log);
                }
            }while (copy != full && c->left == 0);

            c->left = 1;

            c->exit = 0x0000000000000000;

        } else if (c->left == 1) { /* *** } else if () { UNIFIED ENTRY *******/

            for (int i = 0; i < threadCount - 1; ++i) {
                if (c->successfulBarrierVisitsCount[i] != c->successfulBarrierVisitsCount[i+1]) {
                    printf("thread %i and %i are not equal at %lli %lli\n", i, i+1,
                            (long long)c->successfulBarrierVisitsCount[i],
                            (long long)c->successfulBarrierVisitsCount[i+1]);
                    ++c->outOfSyncCount;
                    assert(0);
                }
            }

            /* wait busily until everyone has left the barrier */
            do {
                copy = c->exit;
                if ((copy & me) == 0) {
                    copy |= me;
                    c->exit = copy;
                }
            }while (copy != full && c->left == 1);

            c->left = 0;

            c->entry = 0x0000000000000000;

            ++(c->successfulBarrierVisitsCount[index]);

        } /* *** } UNIFIED ENTRY *********************************************/
    }

    if (index == threadToBeRecorded) {
        int ret = PAPI_stop_counters(papiEnd, 3);
        if (ret != 0) {
            printf("%i: PAPI_stop_counters %i\n", index, ret);
            assert(0);
        }
        printf("thread %i: papi counter 0: %lli - %lli = %lli\n", index, papiEnd[0], papiStart[0], papiEnd[0] - papiStart[0]);
        printf("thread %i: papi counter 1: %lli - %lli = %lli\n", index, papiEnd[1], papiStart[1], papiEnd[1] - papiStart[1]);
        printf("thread %i: papi counter 2: %lli - %lli = %lli\n", index, papiEnd[2], papiStart[2], papiEnd[2] - papiStart[2]);
        printf("\n");
    }

    return NULL;
}
Пример #16
0
int main() {
    int level, numlevels;

    /* Variaveis das medicoes reais (hwloc) */
    hwloc_uint64_t real_cache[3];
    unsigned real_line[3];
    hwloc_topology_t topology;
    hwloc_obj_t obj;

    /* Variaveis das medicoes estimadas (PAPI) */
    int events[3] = { PAPI_L1_DCM, PAPI_L2_DCM, PAPI_L3_DCM };
    long long misses[3];
    int papilevels = 3;

    /* Inicializa hwloc */
    if (hwloc_topology_init(&topology)) {
        fprintf(stderr, "Erro em hwloc_topology_init\n");
        exit(1);
    }
    hwloc_topology_load(topology);

    /* Pega tamanhos reais de cache e line */
    level = 0;
    for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); obj; obj = obj->parent) {
        if (obj->type == HWLOC_OBJ_CACHE) {
            real_cache[level] = obj->attr->cache.size;
            real_line[level] = obj->attr->cache.linesize;
            level++;
        }
    }
    numlevels = level;

    /* Comeca a contar eventos */
    while (PAPI_start_counters(events, papilevels) != PAPI_OK) {
        papilevels--;
    }

    /* TODO: alguma coisa para estimar */

    /* Le os contadores */
    if (PAPI_read_counters(misses, papilevels) != PAPI_OK) {
        fprintf(stderr, "Erro em PAPI_read_counters\n");
        exit(1);
    }

    for (level = 0; level < papilevels; level++) {
        printf("L%d cache miss: %lld\n", level+1, misses[level]);
    }
    printf("\n");

    /* Imprime tabela */
    printf("Nível Linha Real Cache Real Linha Est. Cache Est.\n");
    printf("----- ---------- ---------- ---------- ----------\n");
    for (level = 0; level < 3; level++) {
        printf("%5d ", level+1);
        if (level < numlevels) {
            printf("%10u ", real_line[level]);
            printf("%9lluK ", (unsigned long long) real_cache[level] / 1024);
            if (level < papilevels) {
                /* TODO: imprimir valores estimados no lugar dos reais */
                printf("%10u ", real_line[level]);
                printf("%9lluK", (unsigned long long) real_cache[level] / 1024);
            } else {
                printf("       N/D        N/D");
            }
        } else {
            printf("       N/D        N/D        N/D        N/D");
        }
        printf("\n");
    }
    return 0;
}
Пример #17
0
Файл: main.c Проект: Xodion/CFD
int main(int argc, char *argv[])
{
    double *collideField = NULL;
    double *streamField = NULL;

    char problem[100];
    char pgmInput[1000];
    int *flagField = NULL;
    clock_t begin, end;
    double time_spent;
    struct timeval time_start, time_end;

    int xlength[3], timesteps, timestepsPerPlotting;
    double tau, bddParams[7];

    long long counters[3];
	int PAPI_events[] = {
		PAPI_TOT_CYC,
		PAPI_L2_DCM,
		PAPI_L2_DCA };

	PAPI_library_init(PAPI_VER_CURRENT);

#ifdef DEBUG
    double * exactCollideField; // used for debugging
#endif

    if(readParameters(xlength, &tau, bddParams, &timesteps, &timestepsPerPlotting, problem, pgmInput, argc, argv) == 0)
    {
        begin = clock();
        gettimeofday(&time_start, NULL);
        collideField = (double*) malloc((size_t) sizeof(double) * PARAMQ * (xlength[0] + 2)*(xlength[1] + 2)*(xlength[2] + 2));
        streamField = (double*) malloc((size_t) sizeof(double) * PARAMQ * (xlength[0] + 2)*(xlength[1] + 2)*(xlength[2] + 2));
        flagField = (int *) malloc((size_t) sizeof (int) * (xlength[0] + 2)*(xlength[1] + 2)*(xlength[2] + 2));
        initialiseFields(collideField, streamField, flagField, xlength, problem, pgmInput);

        /** debugging code */
//        /* output the flagField */
//        char szFileName2[80];
//        FILE *fp2 = NULL;
//        sprintf( szFileName2, "Testdata/%s/flagField.dat", problem);
//        fp2 = fopen(szFileName2,"w");
//        for (int i = 0; i < (xlength[0] + 2) * (xlength[1] + 2) * (xlength[2] + 2); i++)
//                    fprintf(fp2, "%d\n", flagField[i]);
        /** debugging code end */

        printf("Progress:     ");

        PAPI_start_counters( PAPI_events, 3 );
        for(int t = 0; t < timesteps; t++)
        {
            double *swap = NULL;
            #ifdef _AVX_
            doStreamingAndCollisionAVX(collideField, streamField, flagField, xlength, tau);
            #else
            doStreamingAndCollision(collideField, streamField, flagField, xlength, tau);
            #endif // _AVX_
            swap = collideField;
            collideField = streamField;
            streamField = swap;

            treatBoundary(collideField, flagField, bddParams, xlength);

            if (t % timestepsPerPlotting == 0)
            {
//                writeVtkOutput(collideField, flagField, "./Paraview/output", (unsigned int) t / timestepsPerPlotting, xlength);
                /** debugging code */
//                 /* create reference files */
//                FILE *fp = NULL;
//                char szFileName[80];
//                sprintf( szFileName, "Testdata/%s/%i.dat", problem, t / timestepsPerPlotting );
//                fp = fopen(szFileName,"w");
//                for (int i = 0; i < PARAMQ * (xlength[0] + 2) * (xlength[1] + 2) * (xlength[2] + 2); i++)
//                    fprintf(fp, "%0.7f\n", collideField[i]);


                /* check correctness */
                #ifdef DEBUG
                exactCollideField = (double *) malloc ( ( size_t ) sizeof(double) * PARAMQ * (xlength[0] + 2) *  (xlength[1] + 2) * (xlength[2] + 2));
                FILE *fp = NULL;
                unsigned int line = 0;
                int noOfReadEntries;
                int error = 0;
                char szFileName[80];
                sprintf( szFileName, "Testdata/%s/%i.dat", problem, t / timestepsPerPlotting );
                fp = fopen(szFileName,"r");
                if (fp != NULL)
                {
                    for (line = 0; line < PARAMQ * (xlength[0] + 2) *  (xlength[1] + 2) * (xlength[2] + 2); line++)
                    {
                        noOfReadEntries = fscanf(fp,"%lf",&exactCollideField[line]);
                        if (noOfReadEntries != 1)
                            continue;
                    }
                }
                fclose(fp);
                for (int i = 0; i < PARAMQ; i++)
                    for (int z = 1; z <= xlength[2]; z++)
                        for (int y = 1; y <= xlength[1]; y++)
                            for(int x = 1; x <= xlength[0]; x++)
                                if (flagField[z * (xlength[0] + 2) * (xlength[1] + 2) + y * (xlength[0] + 2) + x] == FLUID)
                                    if (fabs(collideField[z * (xlength[0] + 2) * (xlength[1] + 2) + y * (xlength[0] + 2 ) + x + (xlength[0] + 2) * (xlength[1] + 2) * (xlength[2] + 2) * i] - exactCollideField[PARAMQ * (z * (xlength[0] + 2) * (xlength[1] + 2) + y * (xlength[0] + 2) + x) + i]) > 1e-4)
                                        error = 1;
                if (error)
                    printf("ERROR: Different collideField in timestep %d\n", t);
                free(exactCollideField);
                #endif // DEBUG
                /** end of debugging code */
            }
            PAPI_read_counters( counters, 3 );
            int pct = ((float) t / timesteps) * 100;

            printf("\b\b\b%02d%%", pct);
            fflush(stdout);


        }
        printf("\b\b\b\b100%%\n");
        end = clock();
        gettimeofday(&time_end, NULL);
        time_spent = (double) (end - begin) / CLOCKS_PER_SEC;

        printf("Running time: %.2fs\n", time_spent);
        printf("Running time (Wall clock): %.2fs\n", ( (double) (( time_end.tv_sec - time_start.tv_sec) * 1000000u + time_end.tv_usec - time_start.tv_usec) )/ 1e6);
        printf("MLUPS: %.3f\n", ((double) (xlength[0] + 2) * (xlength[1] + 2) * (xlength[2] + 2) * timesteps) / (1000000.0 * ((time_end.tv_sec - time_start.tv_sec) * 1000000u + time_end.tv_usec - time_start.tv_usec) / 1e6));


        printf("%lld L2 cache misses (%.3lf%% misses) in %lld cycles\n",
		counters[1],
		(double)counters[1] / (double)counters[2] * 100,
		counters[0] );
        free(collideField);
        free(streamField);
        free(flagField);

    }
    return 0;
}
Пример #18
0
int main(int argc, char *argv[]) {

    if (argc < 4) {
	   printf("Usage: %s data_type(text or bin) input_file output_file\n", argv[0]);
	   return EXIT_FAILURE;
    }

    char *file_type = argv[1]; 
    char *file_in = argv[2];
    char *file_out = argv[3];
    char *str1 = "SU.vtk";
    char *str2 = "VAR.vtk";
    char *str3 = "CGUP.vtk";
    char *file_perf = "pstats.dat";    
    int status = 0;
    /** internal cells start and end index*/
    int nintci, nintcf;
    /** external cells start and end index. The external cells are only ghost cells. 
    They are accessed only through internal cells*/
    int nextci, nextcf;
    /** link cell-to-cell array. Stores topology information*/
    int **lcc;
    /** red-black colouring of the cells*/
    int *nboard;
    /** boundary coefficients for each volume cell */
    double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su;
    /**parameter used for volmesh and reading binary input file */
    int* nodeCnt;
    int*** points;
    int*** elems;
    /**Measured Performance and Papi parameters*/
    long long *values_i = (long long *) calloc(sizeof(long long), 4);
    long long *values_c = (long long *) calloc(sizeof(long long), 4);
    long long *values_o = (long long *) calloc(sizeof(long long), 4);  
    double *mflops = (double *) calloc(sizeof(double), 3);
    double *L1mira = (double *) calloc(sizeof(double), 3);
    double *Lmirate = (double *) calloc(sizeof(double), 3);
    double *util = (double *) calloc(sizeof(double), 3);
    long long *et = (long long *) calloc(sizeof(long long), 3);
    long long start_cycles, start_usec,end_cycles_1, end_usec_1, end_cycles_2, end_cycles_3, end_usec_2, end_usec_3;
    /**In cluster mpp_inter L1 and L2 events can not computed at the same time, 
    so set into two groups*/
    int Events[NUM_EVENTS]={PAPI_L2_TCM,PAPI_L2_TCA,PAPI_FP_INS,PAPI_TOT_CYC};
    // int Events[NUM_EVENTS]={PAPI_L1_TCM,PAPI_L1_TCA,PAPI_FP_INS,PAPI_TOT_CYC};
    /**start HW counters and execution time recorder*/
    if ( PAPI_start_counters( Events, NUM_EVENTS ) != PAPI_OK )
    printf("Fail to start PAPI counter\n");    
    start_cycles = PAPI_get_real_cyc(); // Gets the starting time in clock cycles
    start_usec = PAPI_get_real_usec(); // Gets the starting time in microseconds
    /* initialization  */
    // read-in the input file
    int f_status;
    if (strcmp(file_type,"text") == 0) {

        f_status = read_formatted(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc,
		   &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard);
    } else if (strcmp(file_type,"bin") == 0) { 

        f_status = read_formatted_bin(file_in, &nintci, &nintcf, &nextci,
                   &nextcf, &lcc, &bs, &be, &bn, &bw,
                   &bl, &bh, &bp, &su,&nboard);
    } else { 

        printf ("Input file format is nor correct\n");
             return EXIT_FAILURE;
    }
    if (f_status != 0){	

        printf("failed to initialize data!\n");
	return EXIT_FAILURE;
    }
    // allocate arrays used in gccg
    int nomax = 3;
    /** the reference residual*/
    double resref = 0.0;
    /** the ratio between the reference and the current residual*/
    double ratio;
    /** array storing residuals */
    double* resvec = (double *) calloc(sizeof(double), (nintcf + 1));
    /** the variation vector -> keeps the result in the end */
    double* var = (double *) calloc(sizeof(double), (nextcf + 1));
    /** the computation vectors */
    double* direc1 = (double *) calloc(sizeof(double), (nextcf + 1));
    double* direc2 = (double *) calloc(sizeof(double), (nextcf + 1));
    /** additional vectors */
    double* cgup = (double *) calloc(sizeof(double), (nextcf + 1));
    double* oc = (double *) calloc(sizeof(double), (nintcf + 1));
    double* cnorm = (double *) calloc(sizeof(double), (nintcf + 1));
    double* adxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double* adxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    double* dxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double* dxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    /**store volume information*/
    int nc=0;
    // initialize the reference residual
    for ( nc = nintci; nc <= nintcf; nc++) {
	  resvec[nc] = su[nc];
	  resref = resref + resvec[nc] * resvec[nc];
    }
    resref = sqrt(resref);
    if (resref < 1.0e-15){

	printf("i/o - error: residue sum less than 1.e-15 - %lf\n", resref);
	return EXIT_FAILURE;
    }

    // initialize the arrays
    for (nc = 0; nc <= 10; nc++){
	oc[nc] = 0.0;
	cnorm[nc] = 1.0;
    }

    for (nc = nintci; nc <= nintcf; nc++){
	cgup[nc] = 0.0;
	var[nc] = 0.0;
    }

    for (nc = nextci; nc <= nextcf; nc++){
	var[nc] = 0.0;
	cgup[nc] = 0.0;
	direc1[nc] = 0.0;
	bs[nc] = 0.0;
	be[nc] = 0.0;
	bn[nc] = 0.0;
	bw[nc] = 0.0;
	bl[nc] = 0.0;
	bh[nc] = 0.0;
    }

    for (nc = nintci; nc <= nintcf; nc++){
	cgup[nc] = 1.0 / bp[nc];
    }
    int if1 = 0;
    int if2 = 0;
    int iter = 1;
    int nor = 1;
    int nor1 = nor - 1;
	
    /* finished initalization */
    /*read PAPI HW counters and caculate performance of input phase*/
    if ( PAPI_read_counters( values_i, NUM_EVENTS ) != PAPI_OK ){ 
 	 printf("fail to stop papi counter");
    }
    Lmirate[0] = (double) values_i[0] / values_i[1];
    end_usec_1 = PAPI_get_real_usec(); 
    mflops[0] = (double) values_i[2] / (end_usec_1-start_usec);
    util[0] = mflops[0] / PEAKPER;

    /* start computation loop */
    while (iter < 10000){

    /* start phase 1 */
    // update the old values of direc
    for (nc = nintci; nc <= nintcf; nc++){
	direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc];
    }

    // compute new guess (approximation) for direc
    for (nc = nintci; nc <= nintcf; nc++){
	direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]]
			- bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]]
			- bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]]
			- bh[nc] * direc1[lcc[5][nc]];
    } /* end phase 1 */
	
    /*  start phase 2 */
    // execute normalization steps
    double oc1, oc2, occ;
    if (nor1 == 1){
        oc1 = 0;
        occ = 0;
    for (nc = nintci; nc <= nintcf; nc++){	
	occ = occ + adxor1[nc] * direc2[nc];
    }
         oc1 = occ / cnorm[1];
    for (nc = nintci; nc <= nintcf; nc++){
	direc2[nc] = direc2[nc] - oc1 * adxor1[nc];
	direc1[nc] = direc1[nc] - oc1 * dxor1[nc];
    }
    if1++;
    }else if (nor1 == 2){
	oc1 = 0;
	occ = 0;
    for (nc = nintci; nc <= nintcf; nc++){
	occ = occ + adxor1[nc] * direc2[nc];
    }
	oc1 = occ / cnorm[1];
	oc2 = 0;
	occ = 0;
    for (nc = nintci; nc <= nintcf; nc++){
	occ = occ + adxor2[nc] * direc2[nc];
    }
        oc2 = occ / cnorm[2];
    for (nc = nintci; nc <= nintcf; nc++){
	direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc];
	direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc];
    }

    if2++;
    }

    cnorm[nor] = 0;
    double omega = 0;

    // compute the new residual
    for (nc = nintci; nc <= nintcf; nc++){
	cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc];
	omega = omega + resvec[nc] * direc2[nc];
    }
    omega = omega / cnorm[nor];
    double resnew = 0.0;
    for (nc = nintci; nc <= nintcf; nc++){
	var[nc] = var[nc] + omega * direc1[nc];
	resvec[nc] = resvec[nc] - omega * direc2[nc];
	resnew = resnew + resvec[nc] * resvec[nc];
    }
    resnew = sqrt(resnew);
    ratio = resnew / resref;

    // exit on no improvements of residual
    if (ratio <= 1.0e-10){
	break;
    }
    iter++;

    // prepare additional arrays for the next iteration step
    if (nor == nomax){
	nor = 1;
    }else{
    if (nor == 1){
    for (nc = nintci; nc <= nintcf; nc++){
	dxor1[nc] = direc1[nc];	
	adxor1[nc] = direc2[nc];
    }
    } else if (nor == 2){
    for (nc = nintci; nc <= nintcf; nc++){
	 dxor2[nc] = direc1[nc];
  	 adxor2[nc] = direc2[nc];
    }
    }
         nor++;
    }
         nor1 = nor - 1;

    }/* end phase 2 */

    /* finished computation loop */
    /*read PAPI HW counters and caculate performance of computation phase*/
    end_cycles_2 = PAPI_get_real_cyc(); // Gets the ending time in clock cycles
    end_usec_2 = PAPI_get_real_usec(); // Gets the ending time in microseconds
    if ( PAPI_read_counters( values_c, NUM_EVENTS ) != PAPI_OK ){ 
         printf("fail to read papi counter");
    }

    Lmirate[1] = (double) values_c[0]/values_c[1];
    mflops[1] = (double) values_c[2] / ( end_usec_2-end_usec_1 );
    util[1] = mflops[1] / PEAKPER;
    /* write output file  */
    
    if ( write_result(file_in, file_out, nintci, nintcf, var, iter, ratio) != 0 )
    printf("error when trying to write to file %s\n", file_out);
    
    //transfer volume to mesh
    if (vol2mesh(nintci, nintcf, lcc, &nodeCnt, &points, &elems) != 0 ){ 
        printf("error when trying to converge topology to volume");
    }   
    //write output to vtk file    
    if (write_result_vtk(str1, nintci, nintcf, nodeCnt, points, elems, su) != 0){
       printf("error when write SU to vtk file");
    }
    if (write_result_vtk(str2, nintci, nintcf, nodeCnt, points, elems, var) != 0){
       printf("error when write VAR to vtk file");
    }
    if (write_result_vtk(str3, nintci, nintcf, nodeCnt, points, elems, cgup) != 0){
       printf("error when write CGUP to vtk file");
    }
    /*read PAPI HW counters and caculate performance of output phase*/  
    if ( PAPI_stop_counters( values_o, NUM_EVENTS ) != PAPI_OK ){ 
         printf("fail to stop papi counter");
    } 
    
    Lmirate[2] = (double) values_o[0]/values_o[1];
    end_cycles_3 = PAPI_get_real_cyc(); // Gets the ending time in clock cycles
    end_usec_3 = PAPI_get_real_usec(); // Gets the ending time in microseconds 
    mflops[2] = (double) (values_o[2])/(end_usec_3-end_usec_2);
    util[2] = mflops[2] / PEAKPER;
    /** Write all measured performance to pstats.dat*/
    et[0] = end_usec_1-start_usec;
    et[1] = end_usec_2-end_usec_1;
    et[2] = end_usec_3-end_usec_2;
    if (write_result_dat(file_perf, values_i,values_c, values_o,Lmirate, et, mflops, util) != 0 ){
        printf("error when write measured performance to data file");
    }
    /* Free all the dynamically allocated memory */
    free(direc2); free(direc1); free(dxor2); free(dxor1); free(adxor2); free(adxor1);
    free(cnorm); free(oc); free(var); free(cgup); free(resvec); free(su); free(bp);
    free(bh); free(bl); free(bw); free(bn); free(be); free(bs);
    printf("Simulation completed successfully!\n");
    return EXIT_SUCCESS;
    }
Пример #19
0
void* sssp(void *data) {
  thread_data_t *d = (thread_data_t *)data;

  /* Create transaction */
  set_cpu(the_cores[d->id]);
  /* Wait on barrier */
  ssalloc_init();
  PF_CORRECTION;

  seeds = seed_rand();

#ifdef PIN
  int id = d->id;
  // int cpu = 40*(id/40) + 4*(id%10) + (id%40)/10;
  int cpu = 4*(id%20) + id/20; 
  // printf("Pinning %d to %d\n",id,cpu);
  pin(pthread_self(), cpu);
  //  pin(pthread_self(), id);
#endif

 #ifdef PAPI
    if (PAPI_OK != PAPI_start_counters(g_events, G_EVENT_COUNT))
  {
    printf("Problem starting counters 1.");
  }
 #endif


  barrier_cross(d->barrier);

  // Begin SSSP

  int fail = 0;
  // int radius = 0;
  while (1) {
    val_t node;
    slkey_t dist_node;
  //   print_skiplist(d->set);
    while (1) { 
     if (d->sl) {
       if (spray_delete_min_key(d->set, &dist_node, &node, d)) break; // keep trying until get a node
     } else if (d->pq) {
       if (lotan_shavit_delete_min_key(d->set, &dist_node, &node, d)) break;
     } else if (d->lin) {
       node = (val_t) deletemin_key(d->linden_set, &dist_node, d); break;
     } else {
       printf("error: no queue selected\n");
       exit(1); // TODO: grace
     }
     if (dist_node == -1) { // flag that list is empty
       break;
     }
     dist_node = 0;
    }
    if (dist_node == -1) { // list is empty; TODO make sure threads don't quit early
      fail++;
      if (fail > 20*d->nb_threads) { // TODO: really need a better break condition...
        break;
      }
      continue;
    }
    fail = 0;
    if (dist_node != nodes[node].dist) continue; // dead node
    nodes[node].times_processed++;

    int i;
    for (i = 0;i < nodes[node].deg;i++) {
      int v = nodes[node].adj[i];
      int w = nodes[node].weights[i];
      slkey_t dist_v = nodes[v].dist;
  //  printf("v=%d dist_v=%d\n", v, dist_v);
      if (dist_v == -1 || dist_node + w < dist_v) { // found better path to v
  //       printf("attempting cas...\n");
  //       printf("nodes[v].dist=%d dist_v=%d dist_node=%d\n", nodes[v].dist, dist_v, dist_node);
        int res = ATOMIC_CAS_MB(&nodes[v].dist, dist_v, dist_node+w);
  //       printf("%d nodes[%d].dist=%d\n", res, v, nodes[v].dist);
        if (res) {
          if (d->pq || d->sl) {
            sl_add_val(d->set, dist_node+w, v, TRANSACTIONAL); // add to queue only if CAS is successful
          } else if (d->lin) {
            insert(d->linden_set, dist_node+w, v);
          }
          d->nb_add++;
  //         if (dist_node+1 > radius) {
  //           radius = dist_node+1;
  //           printf("radius %d\n", radius);
  //         }
        }
      } 
    }
  }

  // End SSSP
  
#ifdef PAPI
  if (PAPI_OK != PAPI_read_counters(g_values[d->id], G_EVENT_COUNT))
  {
    printf("Problem reading counters 2.");
  }
#endif

  PF_PRINT;

  return NULL;
}
Пример #20
0
main(int argc, char *argv[])
{
    float **a,**b,**c;
    int n;
    int NB;
    int i,j;
    int x;
//double t0,t1;
    struct timeval t0,t1;
    long mtime, seconds, useconds;

// Using PAPI - from countloop.c
    if (PAPI_VER_CURRENT !=
            PAPI_library_init(PAPI_VER_CURRENT))
        ehandler("PAPI_library_init error.");

    const size_t EVENT_MAX = PAPI_num_counters();
// Suppressing output
//    printf("# Max counters = %zd\n", EVENT_MAX);

    if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
        ehandler("Cannot count PAPI_TOT_INS.");

    if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
        ehandler("Cannot count PAPI_FP_OPS.");

    if (PAPI_OK != PAPI_query_event(PAPI_L1_DCM))
        ehandler("Cannot count PAPI_L1_DCM.");

    size_t EVENT_COUNT = 3;
    int events[] = { PAPI_TOT_INS, PAPI_FP_OPS, PAPI_L1_DCM };
    long long values[EVENT_COUNT];

// Take size from args, not prompt
// printf("Enter n:  ");  scanf("%d",&n);  printf("n = %d\n",n);
    n = atoi(argv[1]);
    NB = atoi(argv[2]);

    a = matrix(1,n,1,n);
    for (i=1; i<=n; i++)
        for (j=1; j<=n; j++)
            a[i][j] = i+j;

    b = matrix(1,n,1,n);
    for (i=1; i<=n; i++)
        for (j=1; j<=n; j++)
            b[i][j] = i-j;


//t0 = get_seconds();
    gettimeofday(&t0, NULL);
// Start PAPI
    PAPI_start_counters(events, EVENT_COUNT);

    if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
        ehandler("Problem reading counters.");
//for (x=0;x<1000;x++){
    c = matrix_prod(n,n,n,n,a,b,NB);
//}
    if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
        ehandler("Problem reading counters.");

//t1 = get_seconds();
    gettimeofday(&t1, NULL);
    seconds = t1.tv_sec - t0.tv_sec;
    useconds = t1.tv_usec - t0.tv_usec;
    mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
//printf("Time for matrix_prod = %f sec\n",t1-t0);
    printf("%d\t%lld\t%lld\t%lld\t%ld\n", n, values[0], values[1],
           values[2], mtime);
}