int main(int argc, char *argv[]) { size_t threads, i; aligned_t *rets; qtimer_t t; unsigned int iter, iterations = 10; double tot = 0.0; assert(qthread_initialize() == 0); t = qtimer_create(); CHECK_VERBOSE(); NUMARG(iterations, "ITERATIONS"); threads = qthread_num_workers(); iprintf("%i shepherds...\n", qthread_num_shepherds()); iprintf("%i threads...\n", (int)threads); initme = calloc(threads, sizeof(aligned_t)); assert(initme); rets = malloc(threads * sizeof(aligned_t)); assert(rets); iprintf("Creating a barrier to block %i threads\n", threads); wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0); // all my spawnees plus me assert(wait_on_me); for (iter = 0; iter < iterations; iter++) { iprintf("%i: forking the threads\n", iter); for (i = 1; i < threads; i++) { void *arg[2] = {wait_on_me, (void*)(intptr_t)i}; qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0); } iprintf("%i: done forking the threads, entering the barrier\n", iter); qtimer_start(t); qt_barrier_enter(wait_on_me, 0); qtimer_stop(t); iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t)); tot += qtimer_secs(t); // reset initme_idx = 1; // check retvals for (i = 1; i < threads; i++) { qthread_readFF(NULL, rets + i); if (initme[i] != iter + 1) { iprintf("initme[%i] = %i (should be %i)\n", (int)i, (int)initme[i], iter + 1); } assert(initme[i] == iter + 1); } } iprintf("Average barrier time = %f\n", tot / iterations); iprintf("Destroying the barrier...\n"); qt_barrier_destroy(wait_on_me); iprintf("Success!\n"); return 0; }
void Task::schedule() { // Is waiting for execution // Increment active task count before spawning. Kokkos::atomic_increment( m_active_count ); // spawn in qthread. must malloc the precondition array and give to qthread. // qthread will eventually free this allocation so memory will not be leaked. // concern with thread safety of malloc, does this need to be guarded? aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) ); qprecon[0] = reinterpret_cast<aligned_t *>( uintptr_t(m_dep_size) ); for ( int i = 0 ; i < m_dep_size ; ++i ) { qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthread precondition flag } if ( m_apply_team && ! m_apply_single ) { // If more than one shepherd spawn on a shepherd other than this shepherd const int num_shepherd = qthread_num_shepherds(); const int num_worker_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); const int this_shepherd = qthread_shep(); int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; #if 0 fprintf( stdout , "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n" , qthread_shep() , qthread_worker_local(NULL) , reinterpret_cast<unsigned long>(this) , spawn_shepherd , num_worker_per_shepherd - 1 ); fflush(stdout); #endif qthread_spawn_cloneable ( & Task::qthread_func , this , 0 , NULL , m_dep_size , qprecon /* dependences */ , spawn_shepherd , unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ) , num_worker_per_shepherd - 1 ); } else { qthread_spawn( & Task::qthread_func /* function */ , this /* function argument */ , 0 , NULL , m_dep_size , qprecon /* dependences */ , NO_SHEPHERD , QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ ); } }