C++ (Cpp) __TBB_load_relaxedの例

コード例 #1

0

ファイルを表示

ファイル: task_group_context.cpp プロジェクト: cyberjunk/OpenFace

void task_group_context::init () {
    __TBB_STATIC_ASSERT ( sizeof(my_version_and_traits) >= 4, "Layout of my_version_and_traits must be reconsidered on this platform" );
    __TBB_STATIC_ASSERT ( sizeof(task_group_context) == 2 * NFS_MaxLineSize, "Context class has wrong size - check padding and members alignment" );
    __TBB_ASSERT ( (uintptr_t(this) & (sizeof(my_cancellation_requested) - 1)) == 0, "Context is improperly aligned" );
    __TBB_ASSERT ( __TBB_load_relaxed(my_kind) == isolated || __TBB_load_relaxed(my_kind) == bound, "Context can be created only as isolated or bound" );
    my_parent = NULL;
    my_cancellation_requested = 0;
    my_exception = NULL;
    my_owner = NULL;
    my_state = 0;
    itt_caller = ITT_CALLER_NULL;
#if __TBB_TASK_PRIORITY
    my_priority = normalized_normal_priority;
#endif /* __TBB_TASK_PRIORITY */
#if __TBB_FP_CONTEXT
    __TBB_STATIC_ASSERT( sizeof(my_cpu_ctl_env) == sizeof(internal::uint64_t), "The reserved space for FPU settings are not equal sizeof(uint64_t)" );
    __TBB_STATIC_ASSERT( sizeof(cpu_ctl_env) <= sizeof(my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t" );
    suppress_unused_warning( my_cpu_ctl_env.space );

    cpu_ctl_env &ctl = *internal::punned_cast<cpu_ctl_env*>(&my_cpu_ctl_env);
    new ( &ctl ) cpu_ctl_env;
    if ( my_version_and_traits & fp_settings )
        ctl.get_env();
#endif
}

コード例 #2

0

ファイルを表示

ファイル: task_group_context.cpp プロジェクト: cyberjunk/OpenFace

void task_group_context::bind_to ( generic_scheduler *local_sched ) {
    __TBB_ASSERT ( __TBB_load_relaxed(my_kind) == binding_required, "Already bound or isolated?" );
    __TBB_ASSERT ( !my_parent, "Parent is set before initial binding" );
    my_parent = local_sched->my_innermost_running_task->prefix().context;
#if __TBB_FP_CONTEXT
    // Inherit FPU settings only if the context has not captured FPU settings yet.
    if ( !(my_version_and_traits & fp_settings) )
        copy_fp_settings(*my_parent);
#endif

    // Condition below prevents unnecessary thrashing parent context's cache line
    if ( !(my_parent->my_state & may_have_children) )
        my_parent->my_state |= may_have_children; // full fence is below
    if ( my_parent->my_parent ) {
        // Even if this context were made accessible for state change propagation
        // (by placing __TBB_store_with_release(s->my_context_list_head.my_next, &my_node)
        // above), it still could be missed if state propagation from a grand-ancestor
        // was underway concurrently with binding.
        // Speculative propagation from the parent together with epoch counters
        // detecting possibility of such a race allow to avoid taking locks when
        // there is no contention.

        // Acquire fence is necessary to prevent reordering subsequent speculative
        // loads of parent state data out of the scope where epoch counters comparison
        // can reliably validate it.
        uintptr_t local_count_snapshot = __TBB_load_with_acquire( my_parent->my_owner->my_context_state_propagation_epoch );
        // Speculative propagation of parent's state. The speculation will be
        // validated by the epoch counters check further on.
        my_cancellation_requested = my_parent->my_cancellation_requested;
#if __TBB_TASK_PRIORITY
        my_priority = my_parent->my_priority;
#endif /* __TBB_TASK_PRIORITY */
        register_with( local_sched ); // Issues full fence

        // If no state propagation was detected by the following condition, the above
        // full fence guarantees that the parent had correct state during speculative
        // propagation before the fence. Otherwise the propagation from parent is
        // repeated under the lock.
        if ( local_count_snapshot != the_context_state_propagation_epoch ) {
            // Another thread may be propagating state change right now. So resort to lock.
            context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex);
            my_cancellation_requested = my_parent->my_cancellation_requested;
#if __TBB_TASK_PRIORITY
            my_priority = my_parent->my_priority;
#endif /* __TBB_TASK_PRIORITY */
        }
    }
    else {
        register_with( local_sched ); // Issues full fence
        // As we do not have grand-ancestors, concurrent state propagation (if any)
        // may originate only from the parent context, and thus it is safe to directly
        // copy the state from it.
        my_cancellation_requested = my_parent->my_cancellation_requested;
#if __TBB_TASK_PRIORITY
        my_priority = my_parent->my_priority;
#endif /* __TBB_TASK_PRIORITY */
    }
    __TBB_store_relaxed(my_kind, binding_completed);
}

コード例 #3

0

ファイルを表示

ファイル: task.cpp プロジェクト: ElaraFX/tbb

//------------------------------------------------------------------------
// Methods of allocate_root_with_context_proxy
//------------------------------------------------------------------------
task& allocate_root_with_context_proxy::allocate( size_t size ) const {
    internal::generic_scheduler* s = governor::local_scheduler();
    __TBB_ASSERT( s, "Scheduler auto-initialization failed?" );
    __TBB_ASSERT( &my_context, "allocate_root(context) argument is a dereferenced NULL pointer" );
    task& t = s->allocate_task( size, NULL, &my_context );
    // Supported usage model prohibits concurrent initial binding. Thus we do not
    // need interlocked operations or fences to manipulate with my_context.my_kind
    if ( __TBB_load_relaxed(my_context.my_kind) == task_group_context::binding_required ) {
        // If we are in the outermost task dispatch loop of a master thread, then
        // there is nothing to bind this context to, and we skip the binding part
        // treating the context as isolated.
        if ( s->master_outermost_level() )
            __TBB_store_relaxed(my_context.my_kind, task_group_context::isolated);
        else
            my_context.bind_to( s );
    }
#if __TBB_FP_CONTEXT
    if ( __TBB_load_relaxed(my_context.my_kind) == task_group_context::isolated &&
            !(my_context.my_version_and_traits & task_group_context::fp_settings) )
        my_context.copy_fp_settings( *s->my_arena->my_default_ctx );
#endif
    ITT_STACK_CREATE(my_context.itt_caller);
    return t;
}

コード例 #4

0

ファイルを表示

ファイル: concurrent_monitor.cpp プロジェクト: cyberjunk/OpenFace

void concurrent_monitor::notify_one_relaxed() {
    if( waitset_ec.empty() )
        return;
    waitset_node_t* n;
    const waitset_node_t* end = waitset_ec.end();
    {
        tbb::spin_mutex::scoped_lock l( mutex_ec );
        __TBB_store_relaxed( epoch, __TBB_load_relaxed(epoch) + 1 );
        n = waitset_ec.front();
        if( n!=end ) {
            waitset_ec.remove( *n );
            to_thread_context(n)->in_waitset = false;
        }
    }
    if( n!=end )
        to_thread_context(n)->semaphore().V();
}

コード例 #5

0

ファイルを表示

ファイル: concurrent_monitor.cpp プロジェクト: cyberjunk/OpenFace

void concurrent_monitor::prepare_wait( thread_context& thr, uintptr_t ctx ) {
    if( !thr.ready )
        thr.init();
    // this is good place to pump previous spurious wakeup
    else if( thr.spurious ) {
        thr.spurious = false;
        thr.semaphore().P();
    }
    thr.context = ctx;
    thr.in_waitset = true;
    {
        tbb::spin_mutex::scoped_lock l( mutex_ec );
        __TBB_store_relaxed( thr.epoch, __TBB_load_relaxed(epoch) );
        waitset_ec.add( (waitset_t::node_t*)&thr );
    }
    atomic_fence();
}

コード例 #6

0

ファイルを表示

ファイル: task.cpp プロジェクト: nicholasferguson/thrust_using_tbb_instead_of_GPU

//------------------------------------------------------------------------
// Methods of allocate_root_with_context_proxy
//------------------------------------------------------------------------
task& allocate_root_with_context_proxy::allocate( size_t size ) const {
    internal::generic_scheduler* s = governor::local_scheduler();
    __TBB_ASSERT( s, "Scheduler auto-initialization failed?" );
    task& t = s->allocate_task( size, NULL, &my_context );
    // Supported usage model prohibits concurrent initial binding. Thus we do not
    // need interlocked operations or fences to manipulate with my_context.my_kind
    if ( __TBB_load_relaxed(my_context.my_kind) == task_group_context::binding_required ) {
        // If we are in the outermost task dispatch loop of a master thread, then
        // there is nothing to bind this context to, and we skip the binding part
        // treating the context as isolated.
        if ( s->my_innermost_running_task == s->my_dummy_task )
            __TBB_store_relaxed(my_context.my_kind, task_group_context::isolated);
        else
            my_context.bind_to( s );
    }
    ITT_STACK_CREATE(my_context.itt_caller);
    return t;
}

コード例 #7

0

ファイルを表示

ファイル: concurrent_monitor.cpp プロジェクト: cyberjunk/OpenFace

void concurrent_monitor::notify_all_relaxed() {
    if( waitset_ec.empty() )
        return;
    waitset_t temp;
    const waitset_node_t* end;
    {
        tbb::spin_mutex::scoped_lock l( mutex_ec );
        __TBB_store_relaxed( epoch, __TBB_load_relaxed(epoch) + 1 );
        waitset_ec.flush_to( temp );
        end = temp.end();
        for( waitset_node_t* n=temp.front(); n!=end; n=n->next )
            to_thread_context(n)->in_waitset = false;
    }
    waitset_node_t* nxt;
    for( waitset_node_t* n=temp.front(); n!=end; n=nxt ) {
        nxt = n->next;
        to_thread_context(n)->semaphore().V();
    }
#if TBB_USE_ASSERT
    temp.clear();
#endif
}

コード例 #8

0

ファイルを表示

ファイル: arena.cpp プロジェクト: jckarter/tbb

bool arena::is_out_of_work() {
    // TODO: rework it to return at least a hint about where a task was found; better if the task itself.
    for(;;) {
        pool_state_t snapshot = my_pool_state;
        switch( snapshot ) {
            case SNAPSHOT_EMPTY:
                return true;
            case SNAPSHOT_FULL: {
                // Use unique id for "busy" in order to avoid ABA problems.
                const pool_state_t busy = pool_state_t(&busy);
                // Request permission to take snapshot
                if( my_pool_state.compare_and_swap( busy, SNAPSHOT_FULL )==SNAPSHOT_FULL ) {
                    // Got permission. Take the snapshot.
                    // NOTE: This is not a lock, as the state can be set to FULL at 
                    //       any moment by a thread that spawns/enqueues new task.
                    size_t n = my_limit;
                    // Make local copies of volatile parameters. Their change during
                    // snapshot taking procedure invalidates the attempt, and returns
                    // this thread into the dispatch loop.
#if __TBB_TASK_PRIORITY
                    intptr_t top_priority = my_top_priority;
                    uintptr_t reload_epoch = my_reload_epoch;
                    // Inspect primary task pools first
#endif /* __TBB_TASK_PRIORITY */
                    size_t k; 
                    for( k=0; k<n; ++k ) {
                        if( my_slots[k].task_pool != EmptyTaskPool &&
                            __TBB_load_relaxed(my_slots[k].head) < __TBB_load_relaxed(my_slots[k].tail) )
                        {
                            // k-th primary task pool is nonempty and does contain tasks.
                            break;
                        }
                    }
                    __TBB_ASSERT( k <= n, NULL );
                    bool work_absent = k == n;
#if __TBB_TASK_PRIORITY
                    // Variable tasks_present indicates presence of tasks at any priority
                    // level, while work_absent refers only to the current priority.
                    bool tasks_present = !work_absent || my_orphaned_tasks;
                    bool dequeuing_possible = false;
                    if ( work_absent ) {
                        // Check for the possibility that recent priority changes
                        // brought some tasks to the current priority level

                        uintptr_t abandonment_epoch = my_abandonment_epoch;
                        // Master thread's scheduler needs special handling as it 
                        // may be destroyed at any moment (workers' schedulers are 
                        // guaranteed to be alive while at least one thread is in arena).
                        // Have to exclude concurrency with task group state change propagation too.
                        my_market->my_arenas_list_mutex.lock();
                        generic_scheduler *s = my_slots[0].my_scheduler;
                        if ( s && __TBB_CompareAndSwapW(&my_slots[0].my_scheduler, (intptr_t)LockedMaster, (intptr_t)s) == (intptr_t)s ) {
                            __TBB_ASSERT( my_slots[0].my_scheduler == LockedMaster && s != LockedMaster, NULL );
                            work_absent = !may_have_tasks( s, my_slots[0], tasks_present, dequeuing_possible );
                            __TBB_store_with_release( my_slots[0].my_scheduler, s );
                        }
                        my_market->my_arenas_list_mutex.unlock();
                        // The following loop is subject to data races. While k-th slot's
                        // scheduler is being examined, corresponding worker can either
                        // leave to RML or migrate to another arena.
                        // But the races are not prevented because all of them are benign.
                        // First, the code relies on the fact that worker thread's scheduler
                        // object persists until the whole library is deinitialized.  
                        // Second, in the worst case the races can only cause another 
                        // round of stealing attempts to be undertaken. Introducing complex 
                        // synchronization into this coldest part of the scheduler's control 
                        // flow does not seem to make sense because it both is unlikely to 
                        // ever have any observable performance effect, and will require 
                        // additional synchronization code on the hotter paths.
                        for( k = 1; work_absent && k < n; ++k )
                            work_absent = !may_have_tasks( my_slots[k].my_scheduler, my_slots[k], tasks_present, dequeuing_possible );
                        // Preclude premature switching arena off because of a race in the previous loop.
                        work_absent = work_absent
                                      && !__TBB_load_with_acquire(my_orphaned_tasks)
                                      && abandonment_epoch == my_abandonment_epoch;
                    }
#endif /* __TBB_TASK_PRIORITY */
                    // Test and test-and-set.
                    if( my_pool_state==busy ) {
#if __TBB_TASK_PRIORITY
                        bool no_fifo_tasks = my_task_stream[top_priority].empty();
                        work_absent = work_absent && (!dequeuing_possible || no_fifo_tasks)
                                      && top_priority == my_top_priority && reload_epoch == my_reload_epoch;
#else
                        bool no_fifo_tasks = my_task_stream.empty();
                        work_absent = work_absent && no_fifo_tasks;
#endif /* __TBB_TASK_PRIORITY */
                        if( work_absent ) {
#if __TBB_TASK_PRIORITY
                            if ( top_priority > my_bottom_priority ) {
                                if ( my_market->lower_arena_priority(*this, top_priority - 1, top_priority)
                                     && !my_task_stream[top_priority].empty() )
                                {
                                    atomic_update( my_skipped_fifo_priority, top_priority, std::less<intptr_t>());
                                }
                            }
                            else if ( !tasks_present && !my_orphaned_tasks && no_fifo_tasks ) {
#endif /* __TBB_TASK_PRIORITY */
                                // save current demand value before setting SNAPSHOT_EMPTY,
                                // to avoid race with advertise_new_work.
                                int current_demand = (int)my_max_num_workers;
                                if( my_pool_state.compare_and_swap( SNAPSHOT_EMPTY, busy )==busy ) {
                                    // This thread transitioned pool to empty state, and thus is 
                                    // responsible for telling RML that there is no other work to do.
                                    my_market->adjust_demand( *this, -current_demand );
#if __TBB_TASK_PRIORITY
                                    // Check for the presence of enqueued tasks "lost" on some of
                                    // priority levels because updating arena priority and switching
                                    // arena into "populated" (FULL) state happen non-atomically.
                                    // Imposing atomicity would require task::enqueue() to use a lock,
                                    // which is unacceptable. 
                                    bool switch_back = false;
                                    for ( int p = 0; p < num_priority_levels; ++p ) {
                                        if ( !my_task_stream[p].empty() ) {
                                            switch_back = true;
                                            if ( p < my_bottom_priority || p > my_top_priority )
                                                my_market->update_arena_priority(*this, p);
                                        }
                                    }
                                    if ( switch_back )
                                        advertise_new_work</*Spawned*/false>();
#endif /* __TBB_TASK_PRIORITY */
                                    return true;
                                }
                                return false;
#if __TBB_TASK_PRIORITY
                            }
#endif /* __TBB_TASK_PRIORITY */
                        }
                        // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it.
                        my_pool_state.compare_and_swap( SNAPSHOT_FULL, busy );
                    }
                } 
                return false;
            }
            default:
                // Another thread is taking a snapshot.
                return false;
        }
    }
}

コード例 #9

0

ファイルを表示

ファイル: task_group_context.cpp プロジェクト: cyberjunk/OpenFace

task_group_context::~task_group_context () {
    if ( __TBB_load_relaxed(my_kind) == binding_completed ) {
        if ( governor::is_set(my_owner) ) {
            // Local update of the context list
            uintptr_t local_count_snapshot = my_owner->my_context_state_propagation_epoch;
            my_owner->my_local_ctx_list_update.store<relaxed>(1);
            // Prevent load of nonlocal update flag from being hoisted before the
            // store to local update flag.
            atomic_fence();
            if ( my_owner->my_nonlocal_ctx_list_update.load<relaxed>() ) {
                spin_mutex::scoped_lock lock(my_owner->my_context_list_mutex);
                my_node.my_prev->my_next = my_node.my_next;
                my_node.my_next->my_prev = my_node.my_prev;
                my_owner->my_local_ctx_list_update.store<relaxed>(0);
            }
            else {
                my_node.my_prev->my_next = my_node.my_next;
                my_node.my_next->my_prev = my_node.my_prev;
                // Release fence is necessary so that update of our neighbors in
                // the context list was committed when possible concurrent destroyer
                // proceeds after local update flag is reset by the following store.
                my_owner->my_local_ctx_list_update.store<release>(0);
                if ( local_count_snapshot != the_context_state_propagation_epoch ) {
                    // Another thread was propagating cancellation request when we removed
                    // ourselves from the list. We must ensure that it is not accessing us
                    // when this destructor finishes. We'll be able to acquire the lock
                    // below only after the other thread finishes with us.
                    spin_mutex::scoped_lock lock(my_owner->my_context_list_mutex);
                }
            }
        }
        else {
            // Nonlocal update of the context list
            // Synchronizes with generic_scheduler::cleanup_local_context_list()
            // TODO: evaluate and perhaps relax, or add some lock instead
            if ( internal::as_atomic(my_kind).fetch_and_store(dying) == detached ) {
                my_node.my_prev->my_next = my_node.my_next;
                my_node.my_next->my_prev = my_node.my_prev;
            }
            else {
                //TODO: evaluate and perhaps relax
                my_owner->my_nonlocal_ctx_list_update.fetch_and_increment<full_fence>();
                //TODO: evaluate and perhaps remove
                spin_wait_until_eq( my_owner->my_local_ctx_list_update, 0u );
                my_owner->my_context_list_mutex.lock();
                my_node.my_prev->my_next = my_node.my_next;
                my_node.my_next->my_prev = my_node.my_prev;
                my_owner->my_context_list_mutex.unlock();
                //TODO: evaluate and perhaps relax
                my_owner->my_nonlocal_ctx_list_update.fetch_and_decrement<full_fence>();
            }
        }
    }
#if __TBB_FP_CONTEXT
    internal::punned_cast<cpu_ctl_env*>(&my_cpu_ctl_env)->~cpu_ctl_env();
#endif
    poison_value(my_version_and_traits);
    if ( my_exception )
        my_exception->destroy();
    ITT_STACK(itt_caller != ITT_CALLER_NULL, caller_destroy, itt_caller);
}