void LockStepTaskScheduler4ThreadsLocalCore::syncThreads(const size_t localThreadID) { const unsigned int m = mode; if (localThreadID == 0) { __memory_barrier(); threadState[m][localThreadID] = 1; __memory_barrier(); while( (*(volatile unsigned int*)&threadState[m][0]) != 0x01010101 ) __pause_cpu(WAIT_CYCLES); mode = 1 - mode; __memory_barrier(); *(volatile unsigned int*)&threadState[m][0] = 0; } else { __memory_barrier(); threadState[m][localThreadID] = 1; __memory_barrier(); while (threadState[m][localThreadID] == 1) __pause_cpu(WAIT_CYCLES); } }
void LinearBarrierActive::wait (const size_t threadIndex) { if (mode == 0) { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count1[i] = 0; for (size_t i=1; i<threadCount; i++) { while (likely(count0[i] == 0)) __pause_cpu(); } mode = 1; flag1 = 0; __memory_barrier(); flag0 = 1; } else { count0[threadIndex] = 1; { while (likely(flag0 == 0)) __pause_cpu(); } } } else { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count0[i] = 0; for (size_t i=1; i<threadCount; i++) { while (likely(count1[i] == 0)) __pause_cpu(); } mode = 0; flag0 = 0; __memory_barrier(); flag1 = 1; } else { count1[threadIndex] = 1; { while (likely(flag1 == 0)) __pause_cpu(); } } } }
bool TaskSchedulerTBB::steal_from_other_threads(Thread& thread) { const size_t threadIndex = thread.threadIndex; const size_t threadCount = this->threadCounter; for (size_t i=1; i<threadCount; i++) { __pause_cpu(32); size_t otherThreadIndex = threadIndex+i; if (otherThreadIndex >= threadCount) otherThreadIndex -= threadCount; Thread* othread = threadLocal[otherThreadIndex]; if (!othread) continue; if (othread->tasks.steal(thread)) return true; } return false; }
void TaskSchedulerTBB::wait_for_threads(size_t threadCount) { while (threadCounter < threadCount-1) __pause_cpu(); }
bool TaskSchedulerTBB::steal_from_other_threads(Thread& thread) { const size_t threadIndex = thread.threadIndex; const size_t threadCount = this->threadCounter; #if SORTED_STEALING == 1 size_t workingThreads = 0; std::pair<size_t,size_t> thread_task_size[MAX_THREADS]; /* find thread with largest estimated size left */ for (size_t i=1; i<threadCount; i++) { size_t otherThreadIndex = threadIndex+i; if (otherThreadIndex >= threadCount) otherThreadIndex -= threadCount; if (!threadLocal[otherThreadIndex]) continue; const size_t task_size = threadLocal[otherThreadIndex]->tasks.getTaskSizeAtLeft(); /* we steal from the left side */ thread_task_size[workingThreads++] = std::pair<size_t,size_t>(task_size,otherThreadIndex); } /* sort thread/size pairs based on size */ std::sort(thread_task_size, &thread_task_size[workingThreads], [](const std::pair<size_t,size_t> & a, const std::pair<size_t,size_t> & b) -> bool { return a.first > b.first; }); /* if (threadIndex == 0) for (size_t i=0;i<workingThreads;i++) std::cout << "thread_task_size " << thread_task_size[i].first << " " << thread_task_size[i].second << std::endl; */ for (size_t i=0; i<workingThreads; i++) { const size_t otherThreadIndex = thread_task_size[i].second; Thread* othread = threadLocal[otherThreadIndex]; if (!othread) continue; if (othread->tasks.steal(thread)) return true; } /* nothing found this time, do another round */ #else for (size_t i=1; i<threadCount; i++) { __pause_cpu(32); size_t otherThreadIndex = threadIndex+i; if (otherThreadIndex >= threadCount) otherThreadIndex -= threadCount; Thread* othread = threadLocal[otherThreadIndex]; if (!othread) continue; if (othread->tasks.steal(thread)) return true; } #endif return false; }
void TaskSchedulerTBB::thread_loop(size_t threadIndex) try { #if defined(__MIC__) setAffinity(threadIndex); #endif /* allocate thread structure */ Thread thread(threadIndex,this); threadLocal[threadIndex] = &thread; thread_local_thread = &thread; /* main thread loop */ while (!terminate) { auto predicate = [&] () { return anyTasksRunning || terminate; }; /* all threads are either spinning ... */ if (spinning) { while (!predicate()) __pause_cpu(32); } /* ... or waiting inside some condition variable */ else { //std::unique_lock<std::mutex> lock(mutex); Lock<MutexSys> lock(mutex); condition.wait(mutex, predicate); } if (terminate) break; /* special static load balancing for top level task sets */ #if TASKSCHEDULER_STATIC_LOAD_BALANCING if (executeTaskSet(thread)) continue; #endif /* work on available task */ steal_loop(thread, [&] () { return anyTasksRunning > 0; }, [&] () { atomic_add(&anyTasksRunning,+1); while (thread.tasks.execute_local(thread,nullptr)); atomic_add(&anyTasksRunning,-1); }); } /* decrement threadCount again */ atomic_add(&threadCounter,-1); /* wait for all threads to terminate */ while (threadCounter > 0) yield(); threadLocal[threadIndex] = nullptr; } catch (const std::exception& e) { std::cout << "Error: " << e.what() << std::endl; // FIXME: propagate to main thread threadLocal[threadIndex] = nullptr; exit(1); }