Esempio n. 1
0
void parTreeSearch(StealStack *ss) {
  Node *parent;
  Node *child;
  void *parent_buf, *child_buf;

#ifdef USING_GTC
  parent_buf = (void*) gtc_task_create_ofclass(sizeof(Node), uts_tclass);
  parent     = gtc_task_body((task_t*)parent_buf);
  child_buf  = (void*) gtc_task_create_ofclass(sizeof(Node), uts_tclass);
  child      = gtc_task_body((task_t*)child_buf);
#else
  child      = malloc(sizeof(Node));
  parent     = malloc(sizeof(Node));
  parent_buf = parent;
  child_buf  = child;
#endif

  while (ss_get_work(ss, parent_buf) == STATUS_HAVEWORK) {
      genChildren(parent, child_buf, child, ss);
#if DEBUG_PROGRESS > 0
      // Debugging: Witness progress...
      if (ss->nNodes % DEBUG_PROGRESS == 0)
      	printf("Thread %3d: Progress is %d nodes\n", ss_get_thread_num(), ss->nNodes);
#endif
  }

#ifdef USING_GTC
  gtc_task_destroy(parent_buf);
  gtc_task_destroy(child_buf);
#else
  free(parent);
  free(child);
#endif
}
Esempio n. 2
0
/* 
 * parallel search of UTS trees using work stealing 
 * 
 *   Note: tree size is measured by the number of
 *         push operations
 */
void parTreeSearch(StealStack *ss) {
  Node* parent;
  Node* child;

  parent = malloc(sizeof(Node));
  child  = malloc(sizeof(Node));

  while (ss_get_work(ss,parent) == STATUS_HAVEWORK) {
      genChildren(parent,child,ss);
      // Debugging: Uncomment to witness progress...
      //if (ss->nNodes % 10000 == 0)
      //	printf("Thread %d: Progress is %d nodes\n", ss_get_thread_num(), ss->nNodes);
  }
  free(child);
  free(parent);
}
Esempio n. 3
0
/*  Main() function for: Sequential, OpenMP, UPC, and Shmem
 *
 *  Notes on execution model:
 *     - under openMP, global vars are all shared
 *     - under UPC, global vars are private unless explicitly shared
 *     - UPC is SPMD starting with main, OpenMP goes SPMD after
 *       parsing parameters
 */
int main(int argc, char *argv[]) {

#ifdef THREAD_METADATA
  memset(t_metadata, 0x00, MAX_OMP_THREADS * sizeof(thread_metadata));
#endif
  memset(thread_info, 0x00, MAX_OMP_THREADS * sizeof(per_thread_info));
  memset(steal_buffer_locks, 0x00, MAX_SHMEM_THREADS * sizeof(long));

  hclib::launch([argc, argv] {

      pe = hclib::pe_for_locale(hclib::shmem_my_pe());
      npes = hclib::shmem_n_pes();

      /* determine benchmark parameters (all PEs) */
      uts_parseParams(argc, argv);

#ifdef UTS_STAT
      if (stats) {
        initHist();
      }
#endif  

      double t1, t2, et;

      /* show parameter settings */
      if (pe == 0) {
          uts_printParams();
      }

      Node root;
      initRootNode(&root, type);

      hclib::shmem_barrier_all();

      /* time parallel search */
      t1 = uts_wctime();

      int n_omp_threads;

    /********** SPMD Parallel Region **********/
      int first = 1;
      n_omp_threads = hclib::num_workers();
      assert(n_omp_threads <= MAX_OMP_THREADS);

      Node child;
retry:
      initNode(&child);

      hclib::finish([&first, &root, &child] {

          if (first) {
              if (pe == 0) {
                  genChildren(&root, &child);
              }
          } else {
              genChildren(&root, &child);
          }
      });
      first = 0;

      if (n_buffered_steals > 0) {
          hclib::shmem_set_lock(&steal_buffer_locks[pe]);
          if (n_buffered_steals > 0) {
              n_buffered_steals--;
              memcpy(&root, &steal_buffer[n_buffered_steals], sizeof(root));
              hclib::shmem_clear_lock(&steal_buffer_locks[pe]);
              goto retry;
          } else {
              hclib::shmem_clear_lock(&steal_buffer_locks[pe]);
          }
      }

      const int got_more_work = remote_steal(&root);
      if (got_more_work == 1) {
          goto retry;
      }

      hclib::shmem_barrier_all();

      t2 = uts_wctime();
      et = t2 - t1;

      int i;
      for (i = 0; i < MAX_OMP_THREADS; i++) {
          n_nodes += thread_info[i].n_nodes;
          n_leaves += thread_info[i].n_leaves;
      }

      hclib::shmem_barrier_all();

      if (pe != 0) {
          hclib::shmem_int_add(&n_nodes, n_nodes, 0);
          hclib::shmem_int_add(&n_leaves, n_leaves, 0);
      }

      hclib::shmem_barrier_all();

      if (pe == 0) {
          showStats(et);
      }
    /********** End Parallel Region **********/
#ifdef THREAD_METADATA
      int p;
      for (p = 0; p < npes; p++) {
          if (p == pe) {
              printf("\n");
              int i;
              for (i = 0; i < n_omp_threads; i++) {
                  printf("PE %d, thread %d: %lu tasks\n", p, i, t_metadata[i].ntasks);
              }
          }
          hclib::shmem_barrier_all();
      }
#endif

  });
  return 0;
}
Esempio n. 4
0
/* 
 * Generate all children of the parent
 *
 * details depend on tree type, node type and shape function
 *
 */
void genChildren(Node * parent, Node * child) {
  int parentHeight = parent->height;
  int numChildren, childType;

#ifdef THREAD_METADATA
  t_metadata[omp_get_thread_num()].ntasks += 1;
#endif

  thread_info[hclib::get_current_worker()].n_nodes++;

  numChildren = uts_numChildren(parent);
  childType   = uts_childType(parent);

  // record number of children in parent
  parent->numChildren = numChildren;
  
  // construct children and push onto stack
  if (numChildren > 0) {
    int i, j;
    child->type = childType;
    child->height = parentHeight + 1;

#ifdef UTS_STAT
    if (stats) {
      child->pp = parent;  // pointer to parent
    }
#endif

    const unsigned char * parent_state = parent->state.state;
    unsigned char * child_state = child->state.state;

    for (i = 0; i < numChildren; i++) {
      for (j = 0; j < computeGranularity; j++) {
        // TBD:  add parent height to spawn
        // computeGranularity controls number of rng_spawn calls per node
          rng_spawn(parent_state, child_state, i);
      }

      Node parent = *child;

      int made_available_for_stealing = 0;
      if (hclib::get_current_worker() == 0 && n_buffered_steals < N_BUFFERED_STEALS) {
          hclib::shmem_set_lock(&steal_buffer_locks[pe]);
          if (n_buffered_steals < N_BUFFERED_STEALS) {
              steal_buffer[n_buffered_steals++] = parent;
              made_available_for_stealing = 1;
          }
          hclib::shmem_clear_lock(&steal_buffer_locks[pe]);
      }

      if (!made_available_for_stealing) {
          if (parent.height < 9) {
              hclib::async([parent] {
                  Node child;
                  initNode(&child);

                  Node tmp = parent;

                  genChildren(&tmp, &child);
              });
          } else {
              Node child;
              initNode(&child);

              genChildren(&parent, &child);
          }
      }
    }
  } else {
      thread_info[hclib::get_current_worker()].n_leaves++;
  }
}