bool task_group_context::cancel_group_execution () { __TBB_ASSERT ( my_cancellation_requested == 0 || my_cancellation_requested == 1, "Invalid cancellation state"); if ( my_cancellation_requested || __TBB_CompareAndSwapW(&my_cancellation_requested, 1, 0) ) { // This task group has already been canceled return false; } #if __TBB_ARENA_PER_MASTER governor::local_scheduler()->my_arena->propagate_cancellation( *this ); #else /* !__TBB_ARENA_PER_MASTER */ governor::local_scheduler()->propagate_cancellation( *this ); #endif /* !__TBB_ARENA_PER_MASTER */ return true; }
bool arena::is_out_of_work() { // TODO: rework it to return at least a hint about where a task was found; better if the task itself. for(;;) { pool_state_t snapshot = my_pool_state; switch( snapshot ) { case SNAPSHOT_EMPTY: return true; case SNAPSHOT_FULL: { // Use unique id for "busy" in order to avoid ABA problems. const pool_state_t busy = pool_state_t(&busy); // Request permission to take snapshot if( my_pool_state.compare_and_swap( busy, SNAPSHOT_FULL )==SNAPSHOT_FULL ) { // Got permission. Take the snapshot. // NOTE: This is not a lock, as the state can be set to FULL at // any moment by a thread that spawns/enqueues new task. size_t n = my_limit; // Make local copies of volatile parameters. Their change during // snapshot taking procedure invalidates the attempt, and returns // this thread into the dispatch loop. #if __TBB_TASK_PRIORITY intptr_t top_priority = my_top_priority; uintptr_t reload_epoch = my_reload_epoch; // Inspect primary task pools first #endif /* __TBB_TASK_PRIORITY */ size_t k; for( k=0; k<n; ++k ) { if( my_slots[k].task_pool != EmptyTaskPool && __TBB_load_relaxed(my_slots[k].head) < __TBB_load_relaxed(my_slots[k].tail) ) { // k-th primary task pool is nonempty and does contain tasks. break; } } __TBB_ASSERT( k <= n, NULL ); bool work_absent = k == n; #if __TBB_TASK_PRIORITY // Variable tasks_present indicates presence of tasks at any priority // level, while work_absent refers only to the current priority. bool tasks_present = !work_absent || my_orphaned_tasks; bool dequeuing_possible = false; if ( work_absent ) { // Check for the possibility that recent priority changes // brought some tasks to the current priority level uintptr_t abandonment_epoch = my_abandonment_epoch; // Master thread's scheduler needs special handling as it // may be destroyed at any moment (workers' schedulers are // guaranteed to be alive while at least one thread is in arena). // Have to exclude concurrency with task group state change propagation too. my_market->my_arenas_list_mutex.lock(); generic_scheduler *s = my_slots[0].my_scheduler; if ( s && __TBB_CompareAndSwapW(&my_slots[0].my_scheduler, (intptr_t)LockedMaster, (intptr_t)s) == (intptr_t)s ) { __TBB_ASSERT( my_slots[0].my_scheduler == LockedMaster && s != LockedMaster, NULL ); work_absent = !may_have_tasks( s, my_slots[0], tasks_present, dequeuing_possible ); __TBB_store_with_release( my_slots[0].my_scheduler, s ); } my_market->my_arenas_list_mutex.unlock(); // The following loop is subject to data races. While k-th slot's // scheduler is being examined, corresponding worker can either // leave to RML or migrate to another arena. // But the races are not prevented because all of them are benign. // First, the code relies on the fact that worker thread's scheduler // object persists until the whole library is deinitialized. // Second, in the worst case the races can only cause another // round of stealing attempts to be undertaken. Introducing complex // synchronization into this coldest part of the scheduler's control // flow does not seem to make sense because it both is unlikely to // ever have any observable performance effect, and will require // additional synchronization code on the hotter paths. for( k = 1; work_absent && k < n; ++k ) work_absent = !may_have_tasks( my_slots[k].my_scheduler, my_slots[k], tasks_present, dequeuing_possible ); // Preclude premature switching arena off because of a race in the previous loop. work_absent = work_absent && !__TBB_load_with_acquire(my_orphaned_tasks) && abandonment_epoch == my_abandonment_epoch; } #endif /* __TBB_TASK_PRIORITY */ // Test and test-and-set. if( my_pool_state==busy ) { #if __TBB_TASK_PRIORITY bool no_fifo_tasks = my_task_stream[top_priority].empty(); work_absent = work_absent && (!dequeuing_possible || no_fifo_tasks) && top_priority == my_top_priority && reload_epoch == my_reload_epoch; #else bool no_fifo_tasks = my_task_stream.empty(); work_absent = work_absent && no_fifo_tasks; #endif /* __TBB_TASK_PRIORITY */ if( work_absent ) { #if __TBB_TASK_PRIORITY if ( top_priority > my_bottom_priority ) { if ( my_market->lower_arena_priority(*this, top_priority - 1, top_priority) && !my_task_stream[top_priority].empty() ) { atomic_update( my_skipped_fifo_priority, top_priority, std::less<intptr_t>()); } } else if ( !tasks_present && !my_orphaned_tasks && no_fifo_tasks ) { #endif /* __TBB_TASK_PRIORITY */ // save current demand value before setting SNAPSHOT_EMPTY, // to avoid race with advertise_new_work. int current_demand = (int)my_max_num_workers; if( my_pool_state.compare_and_swap( SNAPSHOT_EMPTY, busy )==busy ) { // This thread transitioned pool to empty state, and thus is // responsible for telling RML that there is no other work to do. my_market->adjust_demand( *this, -current_demand ); #if __TBB_TASK_PRIORITY // Check for the presence of enqueued tasks "lost" on some of // priority levels because updating arena priority and switching // arena into "populated" (FULL) state happen non-atomically. // Imposing atomicity would require task::enqueue() to use a lock, // which is unacceptable. bool switch_back = false; for ( int p = 0; p < num_priority_levels; ++p ) { if ( !my_task_stream[p].empty() ) { switch_back = true; if ( p < my_bottom_priority || p > my_top_priority ) my_market->update_arena_priority(*this, p); } } if ( switch_back ) advertise_new_work</*Spawned*/false>(); #endif /* __TBB_TASK_PRIORITY */ return true; } return false; #if __TBB_TASK_PRIORITY } #endif /* __TBB_TASK_PRIORITY */ } // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it. my_pool_state.compare_and_swap( SNAPSHOT_FULL, busy ); } } return false; } default: // Another thread is taking a snapshot. return false; } } }
static inline T CAS(volatile T &addr, T newv, T oldv) { // ICC (9.1 and 10.1 tried) unable to do implicit conversion // from "volatile T*" to "volatile void*", so explicit cast added. return T(__TBB_CompareAndSwapW((volatile void *)&addr, (intptr_t)newv, (intptr_t)oldv)); }
void arena::process( generic_scheduler& s ) { __TBB_ASSERT( is_alive(my_guard), NULL ); __TBB_ASSERT( governor::is_set(&s), NULL ); __TBB_ASSERT( !s.innermost_running_task, NULL ); __TBB_ASSERT( my_num_slots != 1, NULL ); // Start search for an empty slot from the one we occupied the last time unsigned index = s.arena_index < my_num_slots ? s.arena_index : s.random.get() % (my_num_slots - 1) + 1, end = index; __TBB_ASSERT( index != 0, "A worker cannot occupy slot 0" ); __TBB_ASSERT( index < my_num_slots, NULL ); // Find a vacant slot for ( ;; ) { if ( !slot[index].my_scheduler && __TBB_CompareAndSwapW( &slot[index].my_scheduler, (intptr_t)&s, 0 ) == 0 ) break; if ( ++index == my_num_slots ) index = 1; if ( index == end ) { // Likely this arena is already saturated if ( --my_num_threads_active == 0 ) close_arena(); return; } } ITT_NOTIFY(sync_acquired, &slot[index]); s.my_arena = this; s.arena_index = index; s.attach_mailbox( affinity_id(index+1) ); slot[index].hint_for_push = index ^ unsigned(&s-(generic_scheduler*)NULL)>>16; // randomizer seed slot[index].hint_for_pop = index; // initial value for round-robin unsigned new_limit = index + 1; unsigned old_limit = my_limit; while ( new_limit > old_limit ) { if ( my_limit.compare_and_swap(new_limit, old_limit) == old_limit ) break; old_limit = my_limit; } for ( ;; ) { // Try to steal a task. // Passing reference count is technically unnecessary in this context, // but omitting it here would add checks inside the function. __TBB_ASSERT( is_alive(my_guard), NULL ); task* t = s.receive_or_steal_task( s.dummy_task->prefix().ref_count, /*return_if_no_work=*/true ); if (t) { // A side effect of receive_or_steal_task is that innermost_running_task can be set. // But for the outermost dispatch loop of a worker it has to be NULL. s.innermost_running_task = NULL; s.local_wait_for_all(*s.dummy_task,t); } ++my_num_threads_leaving; __TBB_ASSERT ( slot[index].head == slot[index].tail, "Worker cannot leave arena while its task pool is not empty" ); __TBB_ASSERT( slot[index].task_pool == EmptyTaskPool, "Empty task pool is not marked appropriately" ); // Revalidate quitting condition // This check prevents relinquishing more than necessary workers because // of the non-atomicity of the decision making procedure if ( num_workers_active() >= my_num_workers_allotted || !my_num_workers_requested ) break; --my_num_threads_leaving; __TBB_ASSERT( !slot[0].my_scheduler || my_num_threads_active > 0, "Who requested more workers after the last one left the dispatch loop and the master's gone?" ); } #if __TBB_STATISTICS ++s.my_counters.arena_roundtrips; *slot[index].my_counters += s.my_counters; s.my_counters.reset(); #endif /* __TBB_STATISTICS */ __TBB_store_with_release( slot[index].my_scheduler, (generic_scheduler*)NULL ); s.inbox.detach(); __TBB_ASSERT( s.inbox.is_idle_state(true), NULL ); __TBB_ASSERT( !s.innermost_running_task, NULL ); __TBB_ASSERT( is_alive(my_guard), NULL ); // Decrementing my_num_threads_active first prevents extra workers from leaving // this arena prematurely, but can result in some workers returning back just // to repeat the escape attempt. If instead my_num_threads_leaving is decremented // first, the result is the opposite - premature leaving is allowed and gratuitous // return is prevented. Since such a race has any likelihood only when multiple // workers are in the stealing loop, and consequently there is a lack of parallel // work in this arena, we'd rather let them go out and try get employment in // other arenas (before returning into this one again). --my_num_threads_leaving; if ( !--my_num_threads_active ) close_arena(); }