Example #1
0
int main(int argc, char *argv[]) {
  pthread_t *threads;
  pthread_attr_t attr;
  uint32_t **ranks;
  void *status;

#if defined(PAPI_ENABLED) && !defined(DEBUG)
  int num_sets;
  PAPI_event_set_wrapper_t* event_sets;
#endif
  int rc;
  uint32_t t;

  printf("Optimized Stream benchmark (using SSE intrinsics)\n");

  init_flush_cache_array();
  malloc_arrays(argv);
  print_array_parameters();
  select_code_variant(argv);
  print_code_variant_parameters();

  threads = (pthread_t *) malloc(numThreads * sizeof(pthread_t));
  ranks = (uint32_t **) malloc(numThreads * sizeof(uint32_t *));

#if !defined(DEBUG)
#if defined(PAPI_ENABLED)
  papi_init(desired_events, num_desired, &event_sets, &num_sets);

  // initialize threaded PAPI
  if (PAPI_thread_init((unsigned long (*)(void)) (pthread_self)) != PAPI_OK) {
    printf("Error with PAPI_thread_init().\n");
    exit(EXIT_FAILURE);
  }

  results = (double *) malloc(num_sets * numThreads * NUM_TRIALS * sizeof(double));
  if (results==NULL) {
    printf("Error on array results malloc.\n");
    exit(EXIT_FAILURE);
  }
#else
  results = (double *) malloc(numThreads * NUM_TRIALS * sizeof(double));
  if (results==NULL) {
    printf("Error on array results malloc.\n");
    exit(EXIT_FAILURE);
  }
#if defined(CYCLE_TIME)
  // calculate clock rate
  GET_CLOCK_RATE(results, NUM_TRIALS);
  median_counts_per_sec = find_median(results, NUM_TRIALS);
  //printf("Median ticks per second = %e\n", median_counts_per_sec);

#else
  timer_init();
  median_counts_per_sec = 1.0;
#endif
#endif
#endif

  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  barrier_init(&my_barrier, numThreads);
#if defined(AFFINITY_ENABLED)
  Affinity_Init();
#endif

  // run stream tests
  for (t=0; t < numThreads; t++) {
    ranks[t] = (uint32_t *) malloc(sizeof(uint32_t));
    *ranks[t] = t;
  }

  for (t=1; t < numThreads; t++) {
#if defined(DEBUG)
    printf("Creating thread %u\n", t);
#endif
    rc = pthread_create(&threads[t], &attr, pthreads_each, (void *) ranks[t]);
    if (rc) {
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(EXIT_FAILURE);
    }
  }
  pthreads_each((void *) ranks[0]);

  // join the other threads
  for (t=1; t < numThreads; t++) {
    pthread_join(threads[t], &status);
  }

#if defined(PAPI_ENABLED) && !defined(DEBUG)
  papi_cleanup(event_sets, num_sets);
#endif
  pthread_attr_destroy(&attr);
  pthread_exit(NULL);
  barrier_destroy(&my_barrier);
  free_arrays();

  return EXIT_SUCCESS;
}
Example #2
0
int main(int argc, char *argv[]) {
  double results[NUM_TRIALS];
#if !defined(DEBUG)
#if defined(PAPI_ENABLED)
  int papi_setnum, num_desired, num_sets;
#else
  double median_counts_per_sec;
#endif
#endif
  int i;

  printf("7-point stencil, no add, naive C code with non-periodic boundary conditions\n");

#if !defined(DEBUG)
#if defined(PAPI_ENABLED)
  // initialize papi
  int desired_events[] = {PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L2_DCA, PAPI_L2_DCM, PAPI_L3_DCM, PAPI_TLB_DM, PAPI_LD_INS, PAPI_SR_INS};
  num_desired = 9;
  PAPI_event_set_wrapper_t* event_sets;
  papi_init(desired_events, num_desired, &event_sets, &num_sets);
#else
  // calculate clock rate
  GET_CLOCK_RATE(results, NUM_TRIALS);
  median_counts_per_sec = find_median(results, NUM_TRIALS);
#endif
#endif

  // initialize arrays
  init_flush_cache_array();
  malloc_grids(argv);
  printf("\n");

#if defined(DEBUG)
  init_grids();
  printf("SINGLY NESTED LOOP:\n");
  printf("\nGRID A BEFORE:");
  print_grid(A);
  printf("\nGRID B BEFORE:");
  print_grid(B);

  naive_singly_nested_loop();

  printf("\nGRID A AFTER:");
  print_grid(A);
  printf("\nGRID B AFTER:");
  print_grid(B);

  init_grids();
  printf("TRIPLY NESTED LOOPS:\n");
  printf("\nGRID A BEFORE:");
  print_grid(A);
  printf("\nGRID B BEFORE:");
  print_grid(B);

  naive_triply_nested_loops();

  printf("\nGRID A AFTER:");
  print_grid(A);
  printf("\nGRID B AFTER:");
  print_grid(B);
#else
#if defined(PAPI_ENABLED)
  printf("SINGLY NESTED LOOP:\n");
  for (papi_setnum=0; papi_setnum < num_sets; papi_setnum++) {
    PAPI_MAKE_MEASUREMENTS(event_sets[papi_setnum].set, naive_singly_nested_loop(), NUM_TRIALS, results);
    print_papi_measurements(&(event_sets[papi_setnum]), results, NUM_TRIALS);
  }
  printf("\n");
  printf("TRIPLY NESTED LOOPS:\n");
  for (papi_setnum=0; papi_setnum < num_sets; papi_setnum++) {
    PAPI_MAKE_MEASUREMENTS(event_sets[papi_setnum].set, naive_triply_nested_loops(), NUM_TRIALS, results);
    print_papi_measurements(&(event_sets[papi_setnum]), results, NUM_TRIALS);
  }
  printf("\n");
  papi_cleanup(event_sets, num_sets);
#else
  printf("SINGLY NESTED LOOP:\n");
  TIMER_MAKE_MEASUREMENTS(naive_singly_nested_loop(), results, NUM_TRIALS);
  print_timer_measurements(results, NUM_TRIALS, median_counts_per_sec);
  printf("\n");
  printf("TRIPLY NESTED LOOPS:\n");
  TIMER_MAKE_MEASUREMENTS(naive_triply_nested_loops(), results, NUM_TRIALS);
  print_timer_measurements(results, NUM_TRIALS, median_counts_per_sec);
  printf("\n");
  printf("\n");
#endif
#endif

  printf("\nFinal interior values: A[%lu, %lu, %lu] = %4.2e, B[%lu, %lu, %lu] = %4.2e\n", nx/2, ny/2, nz/2, A[Index3D(nx/2, ny/2, nz/2)], nx/2, ny/2, nz/2, B[Index3D(nx/2, ny/2, nz/2)]);
  fc_checksum();
  free(A);
  free(B);

  return EXIT_SUCCESS;
}