예제 #1
0
// Set ideal affinity for the current thread
// Parameters:
//  affinity - ideal processor affinity for the thread
// Return:
//  true if it has succeeded, false if it has failed
bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
{
    LIMITED_METHOD_CONTRACT;

    bool success = true;

#if !defined(FEATURE_CORESYSTEM)
    SetThreadIdealProcessor(GetCurrentThread(), (DWORD)affinity->Processor);
#elif !defined(FEATURE_PAL)
    PROCESSOR_NUMBER proc;

    if (affinity->Group != -1)
    {
        proc.Group = (WORD)affinity->Group;
        proc.Number = (BYTE)affinity->Processor;
        proc.Reserved = 0;
        
        success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
    }
    else
    {
        if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
        {
            proc.Number = (BYTE)affinity->Processor;
            success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
        }        
    }
#endif

    return success;
}
예제 #2
0
void SpringApp::SetProcessAffinity(int affinity)
{
#ifdef WIN32
	if (affinity > 0) {
		//! Get the available cores
		DWORD curMask;
		DWORD cores = 0;
		GetProcessAffinityMask(GetCurrentProcess(), &curMask, &cores);

		DWORD_PTR wantedCore = 0xff;

		//! Find an useable core
		while ((wantedCore & cores) == 0 ) {
			wantedCore >>= 1;
		}

		//! Set the affinity
		HANDLE thread = GetCurrentThread();
		DWORD_PTR result = 0;
		if (affinity == 1) {
			result = SetThreadIdealProcessor(thread, (DWORD)wantedCore);
		} else if (affinity >= 2) {
			result = SetThreadAffinityMask(thread, wantedCore);
		}

		if (result > 0) {
			LOG("CPU: affinity set (%d)", affinity);
		} else {
			LOG("CPU: affinity failed");
		}
	}
예제 #3
0
void cpu_thread::set_ideal_processor_core(int core)
{
#ifdef _WIN32
	HANDLE _this_thread = GetCurrentThread();
	SetThreadIdealProcessor(_this_thread, core);
#endif
}
예제 #4
0
//---------------------------------------------------------------------------
void TVPBeginThreadTask(tjs_int taskNum)
{
  TVPThreadTaskNum = taskNum;
  TVPThreadTaskCount = 0;
  tjs_int extraThreadNum = TVPGetThreadNum() - 1;
  if (TVPProcesserIdList.empty()) {
    DWORD processAffinityMask, systemAffinityMask;
    GetProcessAffinityMask(GetCurrentProcess(),
                           &processAffinityMask,
                           &systemAffinityMask);
    for (tjs_int i = 0; i < MAXIMUM_PROCESSORS; i++) {
      if (processAffinityMask & (1 << i))
        TVPProcesserIdList.push_back(i);
    }
    if (TVPProcesserIdList.empty())
      TVPProcesserIdList.push_back(MAXIMUM_PROCESSORS);
  }
  while ( static_cast<tjs_int>(TVPThreadList.size()) < extraThreadNum) {
    ThreadInfo *threadInfo = new ThreadInfo();
    threadInfo->readyToExit = false;
    threadInfo->thread = CreateThread(NULL, 0, ThreadLoop, threadInfo, CREATE_SUSPENDED, NULL);
    SetThreadIdealProcessor(threadInfo->thread, TVPProcesserIdList[TVPThreadList.size() % TVPProcesserIdList.size()]);
    TVPThreadList.push_back(threadInfo);
  }
  while ( static_cast<tjs_int>(TVPThreadList.size()) > extraThreadNum) {
    ThreadInfo *threadInfo = TVPThreadList.back();
    threadInfo->readyToExit = true;
    while (ResumeThread(threadInfo->thread) == 0)
      Sleep(0);
    TVPThreadList.pop_back();
  }
}
예제 #5
0
PixelPipeline::PixelPipeline()
: active_cores(0), local_writer_index(0), local_reader_index(0), local_commands_written(0), cur_block(0)
{
#if defined(WIN32) && defined(PROFILE_PIPELINE)
	SetThreadIdealProcessor(GetCurrentThread(), 0);
	SetThreadAffinityMask(GetCurrentThread(), 1);
	profiler.start_time = __rdtsc();
#endif

	active_cores = System::get_num_cores();
	for (size_t i = 0; i < queue_max; i++)
		command_queue[i] = 0;
	reader_indices.resize(active_cores);
	reader_active.resize(active_cores);

	// Do not change this code to event_more_commands.resize().
	// If you do this, the same Event handle end up in every index due to resize(n) calling resize(n, Event()).
	for (int core = 0; core < active_cores; core++)
		event_more_commands.push_back(Event());

	for (int core = 0; core < active_cores; core++)
	{
		Thread worker_thread;
		worker_thread.start(this, &PixelPipeline::worker_main, core);
		worker_threads.push_back(worker_thread);
	}
}
예제 #6
0
/** 
 *  \brief Maps the calling thread to the given CPU.
 *
 *  It maps the calling thread to the given core. It works on Linux OS, Apple
 *  OS, Windows.
 *
 *  \param cpu_id the ID of the CPU to which the thread will be attached.
 *  \param priority_level TODO
 *
 *  \return An integet value showing the priority level is returned if
 *  successful. Otherwise \p EINVAL is returned.
 */
static inline int ff_mapThreadToCpu(int cpu_id, int priority_level=0) {
    if (cpu_id > ff_numCores()) return EINVAL;
#if defined(__linux__) && defined(CPU_SET)
    cpu_set_t mask;
    CPU_ZERO(&mask);
    CPU_SET(cpu_id, &mask);
    if (sched_setaffinity(gettid(), sizeof(mask), &mask) != 0) 
        return EINVAL;
    return (ff_setPriority(priority_level));
#elif defined(__APPLE__) && MAC_OS_X_HAS_AFFINITY
    // Mac OS does not implement direct pinning of threads onto cores.
    // Threads can be organised in affinity set. Using requested CPU
    // tag for the set. Cores under the same L2 cache are not distinguished. 
    // Should be called before running the thread.
#define CACHE_LEVELS 3
    #define CACHE_L2 2
    size_t len;

    if (sysctlbyname("hw.cacheconfig",NULL, &len, NULL, 0) != 0) {
        perror("sysctl");
    } else {
      int64_t cacheconfig[len];
      if (sysctlbyname("hw.cacheconfig", &cacheconfig[0], &len, NULL, 0) != 0)
        perror("sysctl: unable to get hw.cacheconfig");
      else {
      /*
          for (size_t i=0;i<CACHE_LEVELS;i++)
          std::cerr << " Cache " << i << " shared by " <<  cacheconfig[i] << " cores\n";
      */
      struct thread_affinity_policy mypolicy;
      // Define sets taking in account pinning is performed on L2
      mypolicy.affinity_tag = cpu_id/cacheconfig[CACHE_L2];
      if ( thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, (integer_t*) &mypolicy, THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS ) {
      std::cerr << "Setting affinity of thread ? (" << mach_thread_self() << ") failed!" << std::endl;
      return EINVAL;
      } // else {
      //   std::cerr << "Sucessfully set affinity of thread (" << 
      //   mach_thread_self() << ") to core " << cpu_id/cacheconfig[CACHE_L2] << "\n";
      // }
      }
   }

    return(ff_setPriority(priority_level));
#elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) && defined(_WIN32)
    if (-1==SetThreadIdealProcessor(GetCurrentThread(),cpu_id)) {
        perror("ff_mapThreadToCpu:SetThreadIdealProcessor");
        return EINVAL;
    }
    //std::cerr << "Successfully set affinity of thread " << GetCurrentThreadId() << " to core " << cpu_id << "\n";
#else 
#warning "CPU_SET not defined, cannot map thread to specific CPU"
#endif
    return 0;
}
예제 #7
0
  /*! set the affinity of a given thread */
  void setAffinity(HANDLE thread, ssize_t affinity)
  {
#if _WIN32_WINNT >= _WIN32_WINNT_WIN7
    typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
    typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
    typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
    typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
    HMODULE hlib = LoadLibrary("Kernel32");
    GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
    GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
    SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
    SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
    if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) 
    {
      int groups = pGetActiveProcessorGroupCount();
      int totalProcessors = 0, group = 0, number = 0;
      for (int i = 0; i<groups; i++) {
        int processors = pGetActiveProcessorCount(i);
        if (totalProcessors + processors > affinity) {
          group = i;
          number = (int)affinity - totalProcessors;
          break;
        }
        totalProcessors += processors;
      }
  
      GROUP_AFFINITY groupAffinity;
      groupAffinity.Group = (WORD)group;
      groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
      groupAffinity.Reserved[0] = 0;
      groupAffinity.Reserved[1] = 0;
      groupAffinity.Reserved[2] = 0;
      if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
        WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
  
      PROCESSOR_NUMBER processorNumber;
      processorNumber.Group = group;
      processorNumber.Number = number;
      processorNumber.Reserved = 0;
      if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
        WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
    } 
    else 
#endif
    {
      if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
        WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
      if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
        WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
      }
  }
예제 #8
0
파일: Threading.cpp 프로젝트: 9heart/spring
	boost::uint32_t SetAffinity(boost::uint32_t cores_bitmask, bool hard)
	{
		if (cores_bitmask == 0) {
			return ~0;
		}

	#if defined(__APPLE__) || defined(__FreeBSD__)
		// no-op
		return 0;

	#elif defined(WIN32)
		// Create mask
		DWORD_PTR cpusWanted = (cores_bitmask & cpusSystem);

		// Set the affinity
		HANDLE thread = GetCurrentThread();
		DWORD_PTR result = 0;
		if (hard) {
			result = SetThreadAffinityMask(thread, cpusWanted);
		} else {
			result = SetThreadIdealProcessor(thread, (DWORD)cpusWanted);
		}

		// Return final mask
		return (result > 0) ? (boost::uint32_t)cpusWanted : 0;
	#else
		// Create mask
		cpu_set_t cpusWanted; CPU_ZERO(&cpusWanted);
		int numCpus = std::min(CPU_COUNT(&cpusSystem), 32); // w/o the min(.., 32) `(1 << n)` could overflow!
		for (int n = numCpus - 1; n >= 0; --n) {
			if ((cores_bitmask & (1 << n)) != 0) {
				CPU_SET(n, &cpusWanted);
			}
		}
		CPU_AND(&cpusWanted, &cpusWanted, &cpusSystem);

		// Set the affinity
		int result = sched_setaffinity(0, sizeof(cpu_set_t), &cpusWanted);

		// Return final mask
		uint32_t finalMask = 0;
		for (int n = numCpus - 1; n >= 0; --n) {
			if (CPU_ISSET(n, &cpusWanted)) {
				finalMask |= (1 << n);
			}
		}
		return (result == 0) ? finalMask : 0;
	#endif
	}
예제 #9
0
extern void HK_CALL DemoPlatformInit(hkDemoFrameworkOptions*)
{

#if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED
	// Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero.
	// Typically operations on denormals are very slow, up to 100 times slower than normal numbers.
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif


	HANDLE thread = GetCurrentThread();
	SetThreadIdealProcessor(thread, 0); // try to keep on single core
	// can use XP/Vista/Server2003 etc SetThreadAffinityMask too if we want to be more forceful

	// XAudio requires COM init
	// If the following line does not compile, it means that windows.h was included with
	// a lot for #defines to cut down linkage. A full windows.h include is required, with winnt ver >= 4.
	// In the demos this is done on the PCH, demos.h, before any Havok base windows includes.
	CoInitializeEx(0, COINIT_MULTITHREADED);

}
예제 #10
0
  /*! set the affinity of a given thread */
  void setAffinity(HANDLE thread, ssize_t affinity)
  {
#if (_WIN32_WINNT >= 0x0601) // FIXME: use getProcAddress to activate this feature only if supported by Windows
    int groups = GetActiveProcessorGroupCount();
    int totalProcessors = 0, group = 0, number = 0;
    for (int i = 0; i<groups; i++) {
      int processors = GetActiveProcessorCount(i);
      if (totalProcessors + processors > affinity) {
        group = i;
        number = (int)affinity - totalProcessors;
        break;
      }
      totalProcessors += processors;
    }

    GROUP_AFFINITY groupAffinity;
    groupAffinity.Group = (WORD)group;
    groupAffinity.Mask = (KAFFINITY)(uint64(1) << number);
    groupAffinity.Reserved[0] = 0;
    groupAffinity.Reserved[1] = 0;
    groupAffinity.Reserved[2] = 0;
    if (!SetThreadGroupAffinity(thread, &groupAffinity, NULL))
      THROW_RUNTIME_ERROR("cannot set thread group affinity");

    PROCESSOR_NUMBER processorNumber;
    processorNumber.Group = group;
    processorNumber.Number = number;
    processorNumber.Reserved = 0;
    if (!SetThreadIdealProcessorEx(thread, &processorNumber, NULL))
      THROW_RUNTIME_ERROR("cannot set ideal processor");
#else
    if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64(1) << affinity)))
      THROW_RUNTIME_ERROR("cannot set thread affinity mask");
    if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
      THROW_RUNTIME_ERROR("cannot set ideal processor");
#endif
  }
예제 #11
0
파일: thread.cpp 프로젝트: Qirix/embree
  /*! set the affinity of a given thread */
  void setAffinity(HANDLE thread, ssize_t affinity)
  {
#if (_WIN32_WINNT >= 0x0601)
    int groups = GetActiveProcessorGroupCount();
    int totalProcessors = 0, group = 0, number = 0;
    for (int i = 0; i<groups; i++) {
      int processors = GetActiveProcessorCount(i);
      if (totalProcessors + processors > affinity) {
        group = i;
        number = (int)affinity - totalProcessors;
        break;
      }
      totalProcessors += processors;
    }

    GROUP_AFFINITY groupAffinity;
    groupAffinity.Group = (WORD)group;
    groupAffinity.Mask = (KAFFINITY)(uint64(1) << number);
    groupAffinity.Reserved[0] = 0;
    groupAffinity.Reserved[1] = 0;
    groupAffinity.Reserved[2] = 0;
    if (!SetThreadGroupAffinity(thread, &groupAffinity, NULL))
      throw std::runtime_error("cannot set thread group affinity");

    PROCESSOR_NUMBER processorNumber;
    processorNumber.Group = group;
    processorNumber.Number = number;
    processorNumber.Reserved = 0;
    if (!SetThreadIdealProcessorEx(thread, &processorNumber, NULL))
      throw std::runtime_error("cannot set thread ideal processor");
#else
    if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64(1) << affinity)))
      throw std::runtime_error("cannot set thread affinity mask");
    if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
      throw std::runtime_error("cannot set thread ideal processor");
#endif
  }
예제 #12
0
void PixelPipeline::worker_main(int core)
{
#if defined(WIN32) && defined(PROFILE_PIPELINE)
	SetThreadIdealProcessor(GetCurrentThread(), core);
	SetThreadAffinityMask(GetCurrentThread(), 1 << core);
	unsigned __int64 ticks_waiting = 0;
	unsigned __int64 ticks_working = 0;
#endif
	PixelThreadContext context(core, active_cores);
	while (true)
	{
#if defined(WIN32) && defined(PROFILE_PIPELINE)
		unsigned __int64 wait_start_time = __rdtsc();
#endif
		int wakeup_reason = Event::wait(event_more_commands[core], event_stop);
		if (wakeup_reason != 0)
			break;
		event_more_commands[core].reset();
#if defined(WIN32) && defined(PROFILE_PIPELINE)
		unsigned __int64 wait_end_time = __rdtsc();
		ticks_waiting += wait_end_time-wait_start_time;
#endif
		process_commands(&context);
#if defined(WIN32) && defined(PROFILE_PIPELINE)
		unsigned __int64 commands_end_time = __rdtsc();
		ticks_working += commands_end_time-wait_end_time;
#endif
	}
#if defined(WIN32) && defined(PROFILE_PIPELINE)
	MessageBoxA(
		0,
		cl_format("Pipeline core %1 spent %2 percent of its time waiting for commands",
		core,
		(int)(ticks_waiting*100/(ticks_working+ticks_waiting))).c_str(),
		"DEBUG", MB_OK);
#endif
}
static void* HK_CALL hkWorkerThreadFunc(void *v)
{
    vHavokCpuJobThreadPool::WorkerThreadData& data = *static_cast<vHavokCpuJobThreadPool::WorkerThreadData*>(v);
    vHavokCpuJobThreadPool::SharedThreadData& sharedThreadData = *data.m_sharedThreadData;

    HK_THREAD_LOCAL_SET( hkThreadNumber, data.m_threadId);

#if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED
    // Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero.
    // Typically operations on denormals are very slow, up to 100 times slower than normal numbers.
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif

#ifdef HK_PLATFORM_XBOX360
    XSetThreadProcessor(GetCurrentThread(), data.m_hardwareThreadId );
#elif defined(HK_PLATFORM_WIN32) && !defined(_VISION_WINRT)
    SetThreadIdealProcessor(GetCurrentThread(), data.m_hardwareThreadId);
    // Can use SetThreadAffityMask to be more force-full.
#endif

    hkMemoryRouter memoryRouter;
    hkMemorySystem::getInstance().threadInit( memoryRouter, "vHavokCpuJobThreadPool" );
    hkBaseSystem::initThread( &memoryRouter );
    //hkUint32 allLockVal = 0;
    //hkReferencedObject::initThread(&allLockVal);

    if (sharedThreadData.m_timerBufferAllocation > 0)
    {
        // Allocate a monitor stream for this thread - this  enables timers.
        hkMonitorStream::getInstance().resize(sharedThreadData.m_timerBufferAllocation);
    }
    data.m_monitorStreamBegin = hkMonitorStream::getInstance().getStart();
    data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd();

    hkCheckDeterminismUtil::initThread();


    // VISION specific: Call any per thread callback
    if (sharedThreadData.m_OnWorkerThreadCreatedPtr)
    {
        sharedThreadData.m_CallbackProtect.enter();
        sharedThreadData.m_OnWorkerThreadCreatedPtr->TriggerCallbacks( /* data needed? */);
        sharedThreadData.m_CallbackProtect.leave();
    }
    // END  VISION specific

    // Wait for the main thread to release the worker thread
    data.m_semaphore.acquire();

    // The thread "main loop"
    while (data.m_killThread == false)
    {
        if (data.m_clearTimers)
        {
            hkMonitorStream::getInstance().reset();
            data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd();
            data.m_clearTimers = false;
        }

        const bool isNotPrimary = false;
        hkCheckDeterminismUtil::workerThreadStartFrame(isNotPrimary);

        // Enable timers for critical sections just during the step call
        hkCriticalSection::setTimersEnabled();

        sharedThreadData.m_jobQueue->processAllJobs();

        // Disable timers for critical sections just during the step call
        hkCriticalSection::setTimersDisabled();

        // Note collected timer data
        hkMonitorStream& stream = hkMonitorStream::getInstance();
        data.m_monitorStreamEnd = stream.getEnd();

        hkCheckDeterminismUtil::workerThreadFinishFrame();

        if( sharedThreadData.m_gcThreadMemoryOnCompletion )
        {
            hkMemorySystem::getInstance().garbageCollectThread( memoryRouter );
        }

        // Release any thread (usually the main thread) which may be waiting for all worker threads to finish.
        sharedThreadData.m_workerThreadFinished.release();

        // Immediately wait until the main thread releases the thread again
        data.m_semaphore.acquire();
    }


    // Perform cleanup operations

    // VISION specific: Call any per thread callback
    if (sharedThreadData.m_OnWorkerThreadFinishedPtr)
    {
        sharedThreadData.m_CallbackProtect.enter();
        sharedThreadData.m_OnWorkerThreadFinishedPtr->TriggerCallbacks( /* data needed? */);
        sharedThreadData.m_CallbackProtect.leave();
    }
    // END  VISION specific

    hkCheckDeterminismUtil::quitThread();

    hkBaseSystem::quitThread();
    hkMemorySystem::getInstance().threadQuit( memoryRouter );

    sharedThreadData.m_workerThreadFinished.release();

    return 0;
}
예제 #14
0
void YabThreadSetCurrentThreadAffinityMask(int mask)
{
	SetThreadIdealProcessor(GetCurrentThread(), mask);
}