__forceinline void wait(MutexSys& mutex_in) { /* atomically increment thread count */ ssize_t cnt0 = atomic_add(&count,+1); mutex_in.unlock(); /* all threads except the last one are wait in the barrier */ if (WaitForSingleObject(event, INFINITE) != WAIT_OBJECT_0) THROW_RUNTIME_ERROR("WaitForSingleObject failed"); /* atomically decrement thread count */ mutex_in.lock(); ssize_t cnt1 = atomic_add(&count,-1); /* the last thread that left the barrier resets the event again */ if (cnt1 == 1) { if (ResetEvent(event) == 0) THROW_RUNTIME_ERROR("ResetEvent failed"); } }
RTCORE_API void rtcInit(const char* cfg) { cout << "in rtcInit " << endl; Lock<MutexSys> lock(g_mutex); TRACE(rtcInit); CATCH_BEGIN; if (g_initialized) { g_mutex.unlock(); process_error(RTC_INVALID_OPERATION,"already initialized"); g_mutex.lock(); return; } g_initialized = true; /* reset global state */ initSettings(); if (cfg != NULL) { size_t pos = 0; do { std::string tok = parseIdentifier (cfg,pos); if (tok == "threads" && parseSymbol(cfg,'=',pos)) { g_numThreads = parseInt(cfg,pos); #if defined(__MIC__) if (!(g_numThreads == 1 || (g_numThreads % 4) == 0)) { g_mutex.unlock(); process_error(RTC_INVALID_OPERATION,"Xeon Phi supports only number of threads % 4 == 0, or threads == 1"); g_mutex.lock(); return; } #endif } else if (tok == "isa" && parseSymbol (cfg,'=',pos)) { std::string isa = parseIdentifier (cfg,pos); if (isa == "sse" ) cpu_features = SSE; else if (isa == "sse2") cpu_features = SSE2; else if (isa == "sse3") cpu_features = SSE3; else if (isa == "ssse3") cpu_features = SSSE3; else if (isa == "sse41") cpu_features = SSE41; else if (isa == "sse42") cpu_features = SSE42; else if (isa == "avx") cpu_features = AVX; else if (isa == "avxi") cpu_features = AVXI; else if (isa == "avx2") cpu_features = AVX2; } else if ((tok == "tri_accel" || tok == "accel") && parseSymbol (cfg,'=',pos)) g_tri_accel = parseIdentifier (cfg,pos); else if ((tok == "tri_builder" || tok == "builder") && parseSymbol (cfg,'=',pos)) g_tri_builder = parseIdentifier (cfg,pos); else if ((tok == "tri_traverser" || tok == "traverser") && parseSymbol (cfg,'=',pos)) g_tri_traverser = parseIdentifier (cfg,pos); else if ((tok == "tri_accel_mb" || tok == "accel_mb") && parseSymbol (cfg,'=',pos)) g_tri_accel = parseIdentifier (cfg,pos); else if ((tok == "tri_builder_mb" || tok == "builder_mb") && parseSymbol (cfg,'=',pos)) g_tri_builder = parseIdentifier (cfg,pos); else if ((tok == "tri_traverser_mb" || tok == "traverser_mb") && parseSymbol (cfg,'=',pos)) g_tri_traverser = parseIdentifier (cfg,pos); else if (tok == "hair_accel" && parseSymbol (cfg,'=',pos)) g_hair_accel = parseIdentifier (cfg,pos); else if (tok == "hair_builder" && parseSymbol (cfg,'=',pos)) g_hair_builder = parseIdentifier (cfg,pos); else if (tok == "hair_traverser" && parseSymbol (cfg,'=',pos)) g_hair_traverser = parseIdentifier (cfg,pos); else if (tok == "hair_builder_replication_factor" && parseSymbol (cfg,'=',pos)) g_hair_builder_replication_factor = parseInt (cfg,pos); else if (tok == "verbose" && parseSymbol (cfg,'=',pos)) g_verbose = parseInt (cfg,pos); else if (tok == "benchmark" && parseSymbol (cfg,'=',pos)) g_benchmark = parseInt (cfg,pos); else if (tok == "flags") { g_scene_flags = 0; if (parseSymbol (cfg,'=',pos)) { do { std::string flag = parseIdentifier (cfg,pos); if (flag == "static" ) g_scene_flags |= RTC_SCENE_STATIC; else if (flag == "dynamic") g_scene_flags |= RTC_SCENE_DYNAMIC; else if (flag == "compact") g_scene_flags |= RTC_SCENE_COMPACT; else if (flag == "coherent") g_scene_flags |= RTC_SCENE_COHERENT; else if (flag == "incoherent") g_scene_flags |= RTC_SCENE_INCOHERENT; else if (flag == "high_quality") g_scene_flags |= RTC_SCENE_HIGH_QUALITY; else if (flag == "robust") g_scene_flags |= RTC_SCENE_ROBUST; } while (parseSymbol (cfg,',',pos)); } } } while (findNext (cfg,',',pos)); } if (g_verbose >= 1) { std::cout << "Embree Ray Tracing Kernels " << __EMBREE_VERSION__ << " (" << __DATE__ << ")" << std::endl; std::cout << " Compiler : " << getCompilerName() << std::endl; std::cout << " Platform : " << getPlatformName() << std::endl; std::cout << " CPU : " << stringOfCPUFeatures(getCPUFeatures()) << std::endl; std::cout << " Features : "; #if defined(__USE_RAY_MASK__) std::cout << "raymasks "; #endif #if defined (__BACKFACE_CULLING__) std::cout << "backfaceculling "; #endif #if defined(__INTERSECTION_FILTER__) std::cout << "intersection_filter "; #endif #if defined(__BUFFER_STRIDE__) std::cout << "bufferstride "; #endif std::cout << std::endl; #if defined (__MIC__) #if defined(__BUFFER_STRIDE__) std::cout << " WARNING: enabled 'bufferstride' support will lower BVH build performance" << std::endl; #endif #endif } /* CPU has to support at least SSE2 */ #if !defined (__MIC__) if (!has_feature(SSE2)) { g_mutex.unlock(); process_error(RTC_UNSUPPORTED_CPU,"CPU does not support SSE2"); g_mutex.lock(); return; } #endif g_error = createTls(); g_error_function = NULL; init_globals(); cout << "in rtcInit(), BVH4Register() " << endl; #if !defined(__MIC__) cout << "BVH4Register()" << endl; BVH4Register(); #else cout << "BVH4iRegister() " << endl; BVH4iRegister(); #endif cout << "BVH4MBRegister() " << endl; BVH4MBRegister(); BVH4HairRegister(); #if defined(__TARGET_AVX__) cout << "BVH8Register() " << endl; if (has_feature(AVX)) { BVH8Register(); } #endif InstanceIntersectorsRegister(); //if (g_verbose >= 2) printSettings(); TaskScheduler::create(g_numThreads); cout << " end rtcInit " << endl; CATCH_END; }