// Set ideal affinity for the current thread // Parameters: // affinity - ideal processor affinity for the thread // Return: // true if it has succeeded, false if it has failed bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity) { LIMITED_METHOD_CONTRACT; bool success = true; #if !defined(FEATURE_CORESYSTEM) SetThreadIdealProcessor(GetCurrentThread(), (DWORD)affinity->Processor); #elif !defined(FEATURE_PAL) PROCESSOR_NUMBER proc; if (affinity->Group != -1) { proc.Group = (WORD)affinity->Group; proc.Number = (BYTE)affinity->Processor; proc.Reserved = 0; success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL); } else { if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc)) { proc.Number = (BYTE)affinity->Processor; success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc); } } #endif return success; }
void SpringApp::SetProcessAffinity(int affinity) { #ifdef WIN32 if (affinity > 0) { //! Get the available cores DWORD curMask; DWORD cores = 0; GetProcessAffinityMask(GetCurrentProcess(), &curMask, &cores); DWORD_PTR wantedCore = 0xff; //! Find an useable core while ((wantedCore & cores) == 0 ) { wantedCore >>= 1; } //! Set the affinity HANDLE thread = GetCurrentThread(); DWORD_PTR result = 0; if (affinity == 1) { result = SetThreadIdealProcessor(thread, (DWORD)wantedCore); } else if (affinity >= 2) { result = SetThreadAffinityMask(thread, wantedCore); } if (result > 0) { LOG("CPU: affinity set (%d)", affinity); } else { LOG("CPU: affinity failed"); } }
void cpu_thread::set_ideal_processor_core(int core) { #ifdef _WIN32 HANDLE _this_thread = GetCurrentThread(); SetThreadIdealProcessor(_this_thread, core); #endif }
//--------------------------------------------------------------------------- void TVPBeginThreadTask(tjs_int taskNum) { TVPThreadTaskNum = taskNum; TVPThreadTaskCount = 0; tjs_int extraThreadNum = TVPGetThreadNum() - 1; if (TVPProcesserIdList.empty()) { DWORD processAffinityMask, systemAffinityMask; GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask); for (tjs_int i = 0; i < MAXIMUM_PROCESSORS; i++) { if (processAffinityMask & (1 << i)) TVPProcesserIdList.push_back(i); } if (TVPProcesserIdList.empty()) TVPProcesserIdList.push_back(MAXIMUM_PROCESSORS); } while ( static_cast<tjs_int>(TVPThreadList.size()) < extraThreadNum) { ThreadInfo *threadInfo = new ThreadInfo(); threadInfo->readyToExit = false; threadInfo->thread = CreateThread(NULL, 0, ThreadLoop, threadInfo, CREATE_SUSPENDED, NULL); SetThreadIdealProcessor(threadInfo->thread, TVPProcesserIdList[TVPThreadList.size() % TVPProcesserIdList.size()]); TVPThreadList.push_back(threadInfo); } while ( static_cast<tjs_int>(TVPThreadList.size()) > extraThreadNum) { ThreadInfo *threadInfo = TVPThreadList.back(); threadInfo->readyToExit = true; while (ResumeThread(threadInfo->thread) == 0) Sleep(0); TVPThreadList.pop_back(); } }
PixelPipeline::PixelPipeline() : active_cores(0), local_writer_index(0), local_reader_index(0), local_commands_written(0), cur_block(0) { #if defined(WIN32) && defined(PROFILE_PIPELINE) SetThreadIdealProcessor(GetCurrentThread(), 0); SetThreadAffinityMask(GetCurrentThread(), 1); profiler.start_time = __rdtsc(); #endif active_cores = System::get_num_cores(); for (size_t i = 0; i < queue_max; i++) command_queue[i] = 0; reader_indices.resize(active_cores); reader_active.resize(active_cores); // Do not change this code to event_more_commands.resize(). // If you do this, the same Event handle end up in every index due to resize(n) calling resize(n, Event()). for (int core = 0; core < active_cores; core++) event_more_commands.push_back(Event()); for (int core = 0; core < active_cores; core++) { Thread worker_thread; worker_thread.start(this, &PixelPipeline::worker_main, core); worker_threads.push_back(worker_thread); } }
/** * \brief Maps the calling thread to the given CPU. * * It maps the calling thread to the given core. It works on Linux OS, Apple * OS, Windows. * * \param cpu_id the ID of the CPU to which the thread will be attached. * \param priority_level TODO * * \return An integet value showing the priority level is returned if * successful. Otherwise \p EINVAL is returned. */ static inline int ff_mapThreadToCpu(int cpu_id, int priority_level=0) { if (cpu_id > ff_numCores()) return EINVAL; #if defined(__linux__) && defined(CPU_SET) cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu_id, &mask); if (sched_setaffinity(gettid(), sizeof(mask), &mask) != 0) return EINVAL; return (ff_setPriority(priority_level)); #elif defined(__APPLE__) && MAC_OS_X_HAS_AFFINITY // Mac OS does not implement direct pinning of threads onto cores. // Threads can be organised in affinity set. Using requested CPU // tag for the set. Cores under the same L2 cache are not distinguished. // Should be called before running the thread. #define CACHE_LEVELS 3 #define CACHE_L2 2 size_t len; if (sysctlbyname("hw.cacheconfig",NULL, &len, NULL, 0) != 0) { perror("sysctl"); } else { int64_t cacheconfig[len]; if (sysctlbyname("hw.cacheconfig", &cacheconfig[0], &len, NULL, 0) != 0) perror("sysctl: unable to get hw.cacheconfig"); else { /* for (size_t i=0;i<CACHE_LEVELS;i++) std::cerr << " Cache " << i << " shared by " << cacheconfig[i] << " cores\n"; */ struct thread_affinity_policy mypolicy; // Define sets taking in account pinning is performed on L2 mypolicy.affinity_tag = cpu_id/cacheconfig[CACHE_L2]; if ( thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, (integer_t*) &mypolicy, THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS ) { std::cerr << "Setting affinity of thread ? (" << mach_thread_self() << ") failed!" << std::endl; return EINVAL; } // else { // std::cerr << "Sucessfully set affinity of thread (" << // mach_thread_self() << ") to core " << cpu_id/cacheconfig[CACHE_L2] << "\n"; // } } } return(ff_setPriority(priority_level)); #elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) && defined(_WIN32) if (-1==SetThreadIdealProcessor(GetCurrentThread(),cpu_id)) { perror("ff_mapThreadToCpu:SetThreadIdealProcessor"); return EINVAL; } //std::cerr << "Successfully set affinity of thread " << GetCurrentThreadId() << " to core " << cpu_id << "\n"; #else #warning "CPU_SET not defined, cannot map thread to specific CPU" #endif return 0; }
/*! set the affinity of a given thread */ void setAffinity(HANDLE thread, ssize_t affinity) { #if _WIN32_WINNT >= _WIN32_WINNT_WIN7 typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY); typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER); HMODULE hlib = LoadLibrary("Kernel32"); GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount"); GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount"); SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity"); SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx"); if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) { int groups = pGetActiveProcessorGroupCount(); int totalProcessors = 0, group = 0, number = 0; for (int i = 0; i<groups; i++) { int processors = pGetActiveProcessorCount(i); if (totalProcessors + processors > affinity) { group = i; number = (int)affinity - totalProcessors; break; } totalProcessors += processors; } GROUP_AFFINITY groupAffinity; groupAffinity.Group = (WORD)group; groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); groupAffinity.Reserved[0] = 0; groupAffinity.Reserved[1] = 0; groupAffinity.Reserved[2] = 0; if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning PROCESSOR_NUMBER processorNumber; processorNumber.Group = group; processorNumber.Number = number; processorNumber.Reserved = 0; if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning } else #endif { if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) WARNING("SetThreadAffinityMask failed"); // on purpose only a warning if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning } }
boost::uint32_t SetAffinity(boost::uint32_t cores_bitmask, bool hard) { if (cores_bitmask == 0) { return ~0; } #if defined(__APPLE__) || defined(__FreeBSD__) // no-op return 0; #elif defined(WIN32) // Create mask DWORD_PTR cpusWanted = (cores_bitmask & cpusSystem); // Set the affinity HANDLE thread = GetCurrentThread(); DWORD_PTR result = 0; if (hard) { result = SetThreadAffinityMask(thread, cpusWanted); } else { result = SetThreadIdealProcessor(thread, (DWORD)cpusWanted); } // Return final mask return (result > 0) ? (boost::uint32_t)cpusWanted : 0; #else // Create mask cpu_set_t cpusWanted; CPU_ZERO(&cpusWanted); int numCpus = std::min(CPU_COUNT(&cpusSystem), 32); // w/o the min(.., 32) `(1 << n)` could overflow! for (int n = numCpus - 1; n >= 0; --n) { if ((cores_bitmask & (1 << n)) != 0) { CPU_SET(n, &cpusWanted); } } CPU_AND(&cpusWanted, &cpusWanted, &cpusSystem); // Set the affinity int result = sched_setaffinity(0, sizeof(cpu_set_t), &cpusWanted); // Return final mask uint32_t finalMask = 0; for (int n = numCpus - 1; n >= 0; --n) { if (CPU_ISSET(n, &cpusWanted)) { finalMask |= (1 << n); } } return (result == 0) ? finalMask : 0; #endif }
extern void HK_CALL DemoPlatformInit(hkDemoFrameworkOptions*) { #if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED // Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero. // Typically operations on denormals are very slow, up to 100 times slower than normal numbers. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif HANDLE thread = GetCurrentThread(); SetThreadIdealProcessor(thread, 0); // try to keep on single core // can use XP/Vista/Server2003 etc SetThreadAffinityMask too if we want to be more forceful // XAudio requires COM init // If the following line does not compile, it means that windows.h was included with // a lot for #defines to cut down linkage. A full windows.h include is required, with winnt ver >= 4. // In the demos this is done on the PCH, demos.h, before any Havok base windows includes. CoInitializeEx(0, COINIT_MULTITHREADED); }
/*! set the affinity of a given thread */ void setAffinity(HANDLE thread, ssize_t affinity) { #if (_WIN32_WINNT >= 0x0601) // FIXME: use getProcAddress to activate this feature only if supported by Windows int groups = GetActiveProcessorGroupCount(); int totalProcessors = 0, group = 0, number = 0; for (int i = 0; i<groups; i++) { int processors = GetActiveProcessorCount(i); if (totalProcessors + processors > affinity) { group = i; number = (int)affinity - totalProcessors; break; } totalProcessors += processors; } GROUP_AFFINITY groupAffinity; groupAffinity.Group = (WORD)group; groupAffinity.Mask = (KAFFINITY)(uint64(1) << number); groupAffinity.Reserved[0] = 0; groupAffinity.Reserved[1] = 0; groupAffinity.Reserved[2] = 0; if (!SetThreadGroupAffinity(thread, &groupAffinity, NULL)) THROW_RUNTIME_ERROR("cannot set thread group affinity"); PROCESSOR_NUMBER processorNumber; processorNumber.Group = group; processorNumber.Number = number; processorNumber.Reserved = 0; if (!SetThreadIdealProcessorEx(thread, &processorNumber, NULL)) THROW_RUNTIME_ERROR("cannot set ideal processor"); #else if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64(1) << affinity))) THROW_RUNTIME_ERROR("cannot set thread affinity mask"); if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) THROW_RUNTIME_ERROR("cannot set ideal processor"); #endif }
/*! set the affinity of a given thread */ void setAffinity(HANDLE thread, ssize_t affinity) { #if (_WIN32_WINNT >= 0x0601) int groups = GetActiveProcessorGroupCount(); int totalProcessors = 0, group = 0, number = 0; for (int i = 0; i<groups; i++) { int processors = GetActiveProcessorCount(i); if (totalProcessors + processors > affinity) { group = i; number = (int)affinity - totalProcessors; break; } totalProcessors += processors; } GROUP_AFFINITY groupAffinity; groupAffinity.Group = (WORD)group; groupAffinity.Mask = (KAFFINITY)(uint64(1) << number); groupAffinity.Reserved[0] = 0; groupAffinity.Reserved[1] = 0; groupAffinity.Reserved[2] = 0; if (!SetThreadGroupAffinity(thread, &groupAffinity, NULL)) throw std::runtime_error("cannot set thread group affinity"); PROCESSOR_NUMBER processorNumber; processorNumber.Group = group; processorNumber.Number = number; processorNumber.Reserved = 0; if (!SetThreadIdealProcessorEx(thread, &processorNumber, NULL)) throw std::runtime_error("cannot set thread ideal processor"); #else if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64(1) << affinity))) throw std::runtime_error("cannot set thread affinity mask"); if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) throw std::runtime_error("cannot set thread ideal processor"); #endif }
void PixelPipeline::worker_main(int core) { #if defined(WIN32) && defined(PROFILE_PIPELINE) SetThreadIdealProcessor(GetCurrentThread(), core); SetThreadAffinityMask(GetCurrentThread(), 1 << core); unsigned __int64 ticks_waiting = 0; unsigned __int64 ticks_working = 0; #endif PixelThreadContext context(core, active_cores); while (true) { #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 wait_start_time = __rdtsc(); #endif int wakeup_reason = Event::wait(event_more_commands[core], event_stop); if (wakeup_reason != 0) break; event_more_commands[core].reset(); #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 wait_end_time = __rdtsc(); ticks_waiting += wait_end_time-wait_start_time; #endif process_commands(&context); #if defined(WIN32) && defined(PROFILE_PIPELINE) unsigned __int64 commands_end_time = __rdtsc(); ticks_working += commands_end_time-wait_end_time; #endif } #if defined(WIN32) && defined(PROFILE_PIPELINE) MessageBoxA( 0, cl_format("Pipeline core %1 spent %2 percent of its time waiting for commands", core, (int)(ticks_waiting*100/(ticks_working+ticks_waiting))).c_str(), "DEBUG", MB_OK); #endif }
static void* HK_CALL hkWorkerThreadFunc(void *v) { vHavokCpuJobThreadPool::WorkerThreadData& data = *static_cast<vHavokCpuJobThreadPool::WorkerThreadData*>(v); vHavokCpuJobThreadPool::SharedThreadData& sharedThreadData = *data.m_sharedThreadData; HK_THREAD_LOCAL_SET( hkThreadNumber, data.m_threadId); #if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED // Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero. // Typically operations on denormals are very slow, up to 100 times slower than normal numbers. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif #ifdef HK_PLATFORM_XBOX360 XSetThreadProcessor(GetCurrentThread(), data.m_hardwareThreadId ); #elif defined(HK_PLATFORM_WIN32) && !defined(_VISION_WINRT) SetThreadIdealProcessor(GetCurrentThread(), data.m_hardwareThreadId); // Can use SetThreadAffityMask to be more force-full. #endif hkMemoryRouter memoryRouter; hkMemorySystem::getInstance().threadInit( memoryRouter, "vHavokCpuJobThreadPool" ); hkBaseSystem::initThread( &memoryRouter ); //hkUint32 allLockVal = 0; //hkReferencedObject::initThread(&allLockVal); if (sharedThreadData.m_timerBufferAllocation > 0) { // Allocate a monitor stream for this thread - this enables timers. hkMonitorStream::getInstance().resize(sharedThreadData.m_timerBufferAllocation); } data.m_monitorStreamBegin = hkMonitorStream::getInstance().getStart(); data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd(); hkCheckDeterminismUtil::initThread(); // VISION specific: Call any per thread callback if (sharedThreadData.m_OnWorkerThreadCreatedPtr) { sharedThreadData.m_CallbackProtect.enter(); sharedThreadData.m_OnWorkerThreadCreatedPtr->TriggerCallbacks( /* data needed? */); sharedThreadData.m_CallbackProtect.leave(); } // END VISION specific // Wait for the main thread to release the worker thread data.m_semaphore.acquire(); // The thread "main loop" while (data.m_killThread == false) { if (data.m_clearTimers) { hkMonitorStream::getInstance().reset(); data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd(); data.m_clearTimers = false; } const bool isNotPrimary = false; hkCheckDeterminismUtil::workerThreadStartFrame(isNotPrimary); // Enable timers for critical sections just during the step call hkCriticalSection::setTimersEnabled(); sharedThreadData.m_jobQueue->processAllJobs(); // Disable timers for critical sections just during the step call hkCriticalSection::setTimersDisabled(); // Note collected timer data hkMonitorStream& stream = hkMonitorStream::getInstance(); data.m_monitorStreamEnd = stream.getEnd(); hkCheckDeterminismUtil::workerThreadFinishFrame(); if( sharedThreadData.m_gcThreadMemoryOnCompletion ) { hkMemorySystem::getInstance().garbageCollectThread( memoryRouter ); } // Release any thread (usually the main thread) which may be waiting for all worker threads to finish. sharedThreadData.m_workerThreadFinished.release(); // Immediately wait until the main thread releases the thread again data.m_semaphore.acquire(); } // Perform cleanup operations // VISION specific: Call any per thread callback if (sharedThreadData.m_OnWorkerThreadFinishedPtr) { sharedThreadData.m_CallbackProtect.enter(); sharedThreadData.m_OnWorkerThreadFinishedPtr->TriggerCallbacks( /* data needed? */); sharedThreadData.m_CallbackProtect.leave(); } // END VISION specific hkCheckDeterminismUtil::quitThread(); hkBaseSystem::quitThread(); hkMemorySystem::getInstance().threadQuit( memoryRouter ); sharedThreadData.m_workerThreadFinished.release(); return 0; }
void YabThreadSetCurrentThreadAffinityMask(int mask) { SetThreadIdealProcessor(GetCurrentThread(), mask); }