int main(int argc, char *argv[]) { Node root; double t1, t2; lace_parseParams(&argc, argv); uts_parseParams(argc, argv); uts_printParams(); uts_initRoot(&root, type); lace_init(_lace_workers, _lace_dqsize); lace_startup(32*1024*1024, 0, 0); printf("Initialized Lace with %d workers, dqsize=%d\n", _lace_workers, _lace_dqsize); LACE_ME; t1 = uts_wctime(); Result r = CALL(parTreeSearch, 0, &root); t2 = uts_wctime(); maxTreeDepth = r.maxdepth; nNodes = r.size; nLeaves = r.leaves; uts_showStats(GET_NUM_THREADS, 0, t2-t1, nNodes, nLeaves, maxTreeDepth); printf("Time: %f\n", t2-t1); lace_exit(); return 0; }
int main(int argc, char *argv[]) { double t1, t2; Node root; StealStack *ss; /* initialize stealstacks and comm. layer */ ss = ss_init(&argc, &argv); /* determine benchmark parameters */ uts_parseParams(argc, argv); /* Initialize trace collection structures */ ss_initStats(ss); /* show parameter settings */ if (ss_get_thread_num() == 0) { uts_printParams(); } fflush(NULL); // Workers will return 1 from ss_start(), all others (managers) // will return 0 here once the computation ends if (ss_start(sizeof(Node), chunkSize)) { /* initialize root node and push on thread 0 stack */ if (ss_get_thread_num() == 0) { uts_initRoot(&root, type); #ifdef TRACE ss_markSteal(ss, 0); // first session is own "parent session" #endif ss_put_work(ss, &root); } /* time parallel search */ t1 = uts_wctime(); parTreeSearch(ss); t2 = uts_wctime(); ss->walltime = t2 - t1; #ifdef TRACE ss->startTime = t1; ss->sessionRecords[SS_IDLE][ss->entries[SS_IDLE] - 1].endTime = t2; #endif } ss_stop(); /* display results */ showStats(); ss_finalize(); return 0; }
/* Main() function for: Sequential, OpenMP, UPC, and Shmem * * Notes on execution model: * - under openMP, global vars are all shared * - under UPC, global vars are private unless explicitly shared * - UPC is SPMD starting with main, OpenMP goes SPMD after * parsing parameters */ int main(int argc, char *argv[]) { #ifdef THREAD_METADATA memset(t_metadata, 0x00, MAX_OMP_THREADS * sizeof(thread_metadata)); #endif memset(thread_info, 0x00, MAX_OMP_THREADS * sizeof(per_thread_info)); memset(steal_buffer_locks, 0x00, MAX_SHMEM_THREADS * sizeof(long)); hclib::launch([argc, argv] { pe = hclib::pe_for_locale(hclib::shmem_my_pe()); npes = hclib::shmem_n_pes(); /* determine benchmark parameters (all PEs) */ uts_parseParams(argc, argv); #ifdef UTS_STAT if (stats) { initHist(); } #endif double t1, t2, et; /* show parameter settings */ if (pe == 0) { uts_printParams(); } Node root; initRootNode(&root, type); hclib::shmem_barrier_all(); /* time parallel search */ t1 = uts_wctime(); int n_omp_threads; /********** SPMD Parallel Region **********/ int first = 1; n_omp_threads = hclib::num_workers(); assert(n_omp_threads <= MAX_OMP_THREADS); Node child; retry: initNode(&child); hclib::finish([&first, &root, &child] { if (first) { if (pe == 0) { genChildren(&root, &child); } } else { genChildren(&root, &child); } }); first = 0; if (n_buffered_steals > 0) { hclib::shmem_set_lock(&steal_buffer_locks[pe]); if (n_buffered_steals > 0) { n_buffered_steals--; memcpy(&root, &steal_buffer[n_buffered_steals], sizeof(root)); hclib::shmem_clear_lock(&steal_buffer_locks[pe]); goto retry; } else { hclib::shmem_clear_lock(&steal_buffer_locks[pe]); } } const int got_more_work = remote_steal(&root); if (got_more_work == 1) { goto retry; } hclib::shmem_barrier_all(); t2 = uts_wctime(); et = t2 - t1; int i; for (i = 0; i < MAX_OMP_THREADS; i++) { n_nodes += thread_info[i].n_nodes; n_leaves += thread_info[i].n_leaves; } hclib::shmem_barrier_all(); if (pe != 0) { hclib::shmem_int_add(&n_nodes, n_nodes, 0); hclib::shmem_int_add(&n_leaves, n_leaves, 0); } hclib::shmem_barrier_all(); if (pe == 0) { showStats(et); } /********** End Parallel Region **********/ #ifdef THREAD_METADATA int p; for (p = 0; p < npes; p++) { if (p == pe) { printf("\n"); int i; for (i = 0; i < n_omp_threads; i++) { printf("PE %d, thread %d: %lu tasks\n", p, i, t_metadata[i].ntasks); } } hclib::shmem_barrier_all(); } #endif }); return 0; }