void main(int argc, char **argv) { /* get number of threads to use */ size_t numThreads = 0; if (argc >= 2) numThreads = atoi(argv[1]); if (argc >= 3) g_verbose = atoi(argv[2]); #if defined(__MIC__) if (numThreads == 0) numThreads = getNumberOfLogicalThreads()-4; #else if (numThreads == 0) numThreads = getNumberOfLogicalThreads(); #endif /* enable wait to attach with debugger */ #if 0 std::cout << "waiting " << std::flush; for (int i=0; i<20; i++) { sleep(1); std::cout << "." << std::flush; } std::cout << " [DONE]" << std::endl; #endif /*! create device */ g_device = create("",numThreads,g_verbose); }
void TaskScheduler::createThreads(size_t numThreads_in, bool set_affinity) { numThreads = numThreads_in; defaultNumThreads = false; #if defined(__MIC__) if (numThreads == 0) { numThreads = getNumberOfLogicalThreads()-4; defaultNumThreads = true; } #else if (numThreads == 0) { numThreads = getNumberOfLogicalThreads(); defaultNumThreads = true; } #endif numEnabledThreads = numThreads; /* generate all threads */ for (size_t t=0; t<numThreads; t++) { threads.push_back(createThread((thread_func)threadFunction,new Thread(t,numThreads,this),4*1024*1024,set_affinity ? t : -1)); } //TaskLogger::init(numThreads); //taskBarrier.init(numThreads); }
TaskSchedulerTBB::TaskSchedulerTBB() : threadCounter(0), anyTasksRunning(0), hasRootTask(false) { threadLocal.resize(2*getNumberOfLogicalThreads()); // FIXME: this has to be 2x as in the join mode the worker threads also join for (size_t i=0; i<threadLocal.size(); i++) threadLocal[i] = nullptr; }
void TaskSchedulerTBB::ThreadPool::setNumThreads(size_t newNumThreads, bool startThreads) { Lock<MutexSys> lock(g_mutex); if (newNumThreads == 0) newNumThreads = getNumberOfLogicalThreads(); numThreads = newNumThreads; if (!startThreads && !running) return; running = true; size_t numThreadsActive = numThreadsRunning; mutex.lock(); numThreadsRunning = newNumThreads; mutex.unlock(); condition.notify_all(); /* start new threads */ for (size_t t=numThreadsActive; t<numThreads; t++) { if (t == 0) continue; auto pair = std::make_pair(this,t); threads.push_back(createThread((thread_func)threadPoolFunction,&pair,4*1024*1024,set_affinity ? t : -1)); g_barrier.wait(); } /* stop some threads if we reduce the number of threads */ for (ssize_t t=numThreadsActive-1; t>=ssize_t(numThreadsRunning); t--) { if (t == 0) continue; embree::join(threads.back()); threads.pop_back(); } }
void launch_renderTile (int numTiles, int* pixels, const int width, const int height, const float time, const Vec3fa& vx, const Vec3fa& vy, const Vec3fa& vz, const Vec3fa& p, const int numTilesX, const int numTilesY) { #if 0 atomic_t tileID = 0; parallel_for(size_t(0),size_t(getNumberOfLogicalThreads()),[&] (const range<size_t>& r) { for (size_t tid=r.begin(); tid<r.end(); tid++) { while (true) { size_t i = atomic_add(&tileID,1); if (i >= numTiles) break; renderTile(i,pixels,width,height,time,vx,vy,vz,p,numTilesX,numTilesY); } } }); #else parallel_for(size_t(0),size_t(numTiles),[&] (const range<size_t>& r) { //if (inrender) PING; //inrender = true; for (size_t i=r.begin(); i<r.end(); i++) renderTile(i,pixels,width,height,time,vx,vy,vz,p,numTilesX,numTilesY); //inrender = false; }); #endif }
TaskScheduler::TaskScheduler() : threadCounter(0), anyTasksRunning(0), hasRootTask(false) { threadLocal.resize(2*getNumberOfLogicalThreads()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommit the worker threads also join. When disallowing rtcCommit to join a build we can remove the 2x. for (size_t i=0; i<threadLocal.size(); i++) threadLocal[i].store(nullptr); }
bool run () { threadID.store(0); numFailed.store(0); size_t numThreads = getNumberOfLogicalThreads(); threadResults.resize(numThreads); barrier.init(numThreads+1); /* create threads */ std::vector<thread_t> threads; for (size_t i=0; i<numThreads; i++) threads.push_back(createThread((thread_func)thread_alloc,this)); /* run test */ for (size_t i=0; i<1000; i++) { for (size_t i=0; i<numThreads; i++) threadResults[i] = 0; barrier.wait(); barrier.wait(); for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i; } /* destroy threads */ for (size_t i=0; i<numThreads; i++) join(threads[i]); return numFailed == 0; }
TaskSchedulerTBB::TaskSchedulerTBB(size_t numThreads, bool spinning) : threadCounter(numThreads), createThreads(true), terminate(false), anyTasksRunning(0), active(false), spinning(spinning), task_set_function(nullptr), masterThread(0,this) { for (size_t i=0; i<MAX_THREADS; i++) threadLocal[i] = nullptr; if (numThreads == -1) { threadCounter = 1; createThreads = false; } else if (numThreads == 0) { #if defined(__MIC__) threadCounter = getNumberOfLogicalThreads()-4; #else threadCounter = getNumberOfLogicalThreads(); #endif } task_set_barrier.init(threadCounter); }
void TaskScheduler::createThreads(size_t numThreads_in) { numThreads = numThreads_in; #if defined(__MIC__) if (numThreads == 0) numThreads = getNumberOfLogicalThreads()-4; #else if (numThreads == 0) numThreads = getNumberOfLogicalThreads(); #endif /* this mapping is only required as ISPC does not propagate task groups */ thread2event = (ThreadEvent*) alignedMalloc(numThreads*sizeof(ThreadEvent)); memset(thread2event,0,numThreads*sizeof(ThreadEvent)); /* generate all threads */ for (size_t t=0; t<numThreads; t++) { threads.push_back(createThread((thread_func)threadFunction,new Thread(t,numThreads,this),4*1024*1024,t)); } //setAffinity(0); TaskLogger::init(numThreads); }
TaskSchedulerTBB::ThreadPool::ThreadPool(size_t numThreads, bool set_affinity) : numThreads(numThreads), set_affinity(set_affinity), running(false), terminate(false) { if (this->numThreads == 0) this->numThreads = getNumberOfLogicalThreads(); }
void Device::print() { const int cpu_features = getCPUFeatures(); std::cout << std::endl; std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl; std::cout << " Compiler : " << getCompilerName() << std::endl; std::cout << " Build : "; #if defined(DEBUG) std::cout << "Debug " << std::endl; #else std::cout << "Release " << std::endl; #endif std::cout << " Platform : " << getPlatformName() << std::endl; std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl; std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl; std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl; std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl; const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON; const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON; std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl; std::cout << " Config" << std::endl; std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl; std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl; std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl; std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl; std::cout << " Features: " << getEmbreeFeatures() << std::endl; std::cout << " Tasking : "; #if defined(TASKING_TBB) std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " "; std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " "; #endif #if defined(TASKING_INTERNAL) std::cout << "internal_tasking_system "; #endif #if defined(TASKING_PPL) std::cout << "PPL "; #endif std::cout << std::endl; /* check of FTZ and DAZ flags are set in CSR */ if (!hasFTZ || !hasDAZ) { #if !defined(_DEBUG) if (State::verbosity(1)) #endif { std::cout << std::endl; std::cout << "================================================================================" << std::endl; std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl << " in the MXCSR control and status register. This can have a severe " << std::endl << " performance impact. Please enable these modes for each application " << std::endl << " thread the following way:" << std::endl << std::endl << " #include \"xmmintrin.h\"" << std::endl << " #include \"pmmintrin.h\"" << std::endl << std::endl << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl; std::cout << "================================================================================" << std::endl; std::cout << std::endl; } } std::cout << std::endl; }
LinearBarrierActive::LinearBarrierActive (size_t N) : count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0) { if (N == 0) N = getNumberOfLogicalThreads(); init(N); }