Ejemplo n.º 1
0
int main(int argc, char *argv[]) {
  Node root;
  double t1, t2;

  lace_parseParams(&argc, argv);
  uts_parseParams(argc, argv);

  uts_printParams();
  uts_initRoot(&root, type);
  
  lace_init(_lace_workers, _lace_dqsize);
  lace_startup(32*1024*1024, 0, 0);

  printf("Initialized Lace with %d workers, dqsize=%d\n", _lace_workers, _lace_dqsize);

  LACE_ME;

  t1 = uts_wctime();
  Result r = CALL(parTreeSearch, 0, &root);
  t2 = uts_wctime();

  maxTreeDepth = r.maxdepth;
  nNodes  = r.size;
  nLeaves = r.leaves;

  uts_showStats(GET_NUM_THREADS, 0, t2-t1, nNodes, nLeaves, maxTreeDepth);

  printf("Time: %f\n", t2-t1);

  lace_exit();

  return 0;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[]) {
  double t1, t2;
  Node root;
  StealStack *ss;

  /* initialize stealstacks and comm. layer */
  ss = ss_init(&argc, &argv);

  /* determine benchmark parameters */
  uts_parseParams(argc, argv);

  /* Initialize trace collection structures */
  ss_initStats(ss);
 
  /* show parameter settings */
  if (ss_get_thread_num() == 0) {
      uts_printParams();
  }
  
  fflush(NULL);

  // Workers will return 1 from ss_start(), all others (managers)
  // will return 0 here once the computation ends
  if (ss_start(sizeof(Node), chunkSize)) {

      /* initialize root node and push on thread 0 stack */
      if (ss_get_thread_num() == 0) {
          uts_initRoot(&root, type);
#ifdef TRACE
	  ss_markSteal(ss, 0); // first session is own "parent session"
#endif
          ss_put_work(ss, &root);
      }
  
      /* time parallel search */
      t1 = uts_wctime();
      parTreeSearch(ss);
      t2 = uts_wctime();
      ss->walltime = t2 - t1;
#ifdef TRACE
      ss->startTime = t1;
      ss->sessionRecords[SS_IDLE][ss->entries[SS_IDLE] - 1].endTime = t2;
#endif
  }

  ss_stop();

  /* display results */
  showStats();

  ss_finalize();

  return 0;
}
Ejemplo n.º 3
0
/*  Main() function for: Sequential, OpenMP, UPC, and Shmem
 *
 *  Notes on execution model:
 *     - under openMP, global vars are all shared
 *     - under UPC, global vars are private unless explicitly shared
 *     - UPC is SPMD starting with main, OpenMP goes SPMD after
 *       parsing parameters
 */
int main(int argc, char *argv[]) {

#ifdef THREAD_METADATA
  memset(t_metadata, 0x00, MAX_OMP_THREADS * sizeof(thread_metadata));
#endif
  memset(thread_info, 0x00, MAX_OMP_THREADS * sizeof(per_thread_info));
  memset(steal_buffer_locks, 0x00, MAX_SHMEM_THREADS * sizeof(long));

  hclib::launch([argc, argv] {

      pe = hclib::pe_for_locale(hclib::shmem_my_pe());
      npes = hclib::shmem_n_pes();

      /* determine benchmark parameters (all PEs) */
      uts_parseParams(argc, argv);

#ifdef UTS_STAT
      if (stats) {
        initHist();
      }
#endif  

      double t1, t2, et;

      /* show parameter settings */
      if (pe == 0) {
          uts_printParams();
      }

      Node root;
      initRootNode(&root, type);

      hclib::shmem_barrier_all();

      /* time parallel search */
      t1 = uts_wctime();

      int n_omp_threads;

    /********** SPMD Parallel Region **********/
      int first = 1;
      n_omp_threads = hclib::num_workers();
      assert(n_omp_threads <= MAX_OMP_THREADS);

      Node child;
retry:
      initNode(&child);

      hclib::finish([&first, &root, &child] {

          if (first) {
              if (pe == 0) {
                  genChildren(&root, &child);
              }
          } else {
              genChildren(&root, &child);
          }
      });
      first = 0;

      if (n_buffered_steals > 0) {
          hclib::shmem_set_lock(&steal_buffer_locks[pe]);
          if (n_buffered_steals > 0) {
              n_buffered_steals--;
              memcpy(&root, &steal_buffer[n_buffered_steals], sizeof(root));
              hclib::shmem_clear_lock(&steal_buffer_locks[pe]);
              goto retry;
          } else {
              hclib::shmem_clear_lock(&steal_buffer_locks[pe]);
          }
      }

      const int got_more_work = remote_steal(&root);
      if (got_more_work == 1) {
          goto retry;
      }

      hclib::shmem_barrier_all();

      t2 = uts_wctime();
      et = t2 - t1;

      int i;
      for (i = 0; i < MAX_OMP_THREADS; i++) {
          n_nodes += thread_info[i].n_nodes;
          n_leaves += thread_info[i].n_leaves;
      }

      hclib::shmem_barrier_all();

      if (pe != 0) {
          hclib::shmem_int_add(&n_nodes, n_nodes, 0);
          hclib::shmem_int_add(&n_leaves, n_leaves, 0);
      }

      hclib::shmem_barrier_all();

      if (pe == 0) {
          showStats(et);
      }
    /********** End Parallel Region **********/
#ifdef THREAD_METADATA
      int p;
      for (p = 0; p < npes; p++) {
          if (p == pe) {
              printf("\n");
              int i;
              for (i = 0; i < n_omp_threads; i++) {
                  printf("PE %d, thread %d: %lu tasks\n", p, i, t_metadata[i].ntasks);
              }
          }
          hclib::shmem_barrier_all();
      }
#endif

  });
  return 0;
}