// Parallel Definitions void ParallelFor(std::function<void(int64_t)> func, int64_t count, int chunkSize) { CHECK(threads.size() > 0 || MaxThreadIndex() == 1); // Run iterations immediately if not using threads or if _count_ is small if (threads.empty() || count < chunkSize) { for (int64_t i = 0; i < count; ++i) func(i); return; } // Create and enqueue _ParallelForLoop_ for this loop ParallelForLoop loop(std::move(func), count, chunkSize, CurrentProfilerState()); workListMutex.lock(); loop.next = workList; workList = &loop; workListMutex.unlock(); // Notify worker threads of work to be done std::unique_lock<std::mutex> lock(workListMutex); workListCondition.notify_all(); // Help out with parallel loop iterations in the current thread while (!loop.Finished()) { // Run a chunk of loop iterations for _loop_ // Find the set of loop iterations to run next int64_t indexStart = loop.nextIndex; int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex); // Update _loop_ to reflect iterations this thread will run loop.nextIndex = indexEnd; if (loop.nextIndex == loop.maxIndex) workList = loop.next; loop.activeWorkers++; // Run loop indices in _[indexStart, indexEnd)_ lock.unlock(); for (int64_t index = indexStart; index < indexEnd; ++index) { uint64_t oldState = ProfilerState; ProfilerState = loop.profilerState; if (loop.func1D) { loop.func1D(index); } // Handle other types of loops else { CHECK(loop.func2D); loop.func2D(Point2i(index % loop.nX, index / loop.nX)); } ProfilerState = oldState; } lock.lock(); // Update _loop_ to reflect completion of iterations loop.activeWorkers--; } }
void ParallelFor2D(std::function<void(Point2i)> func, const Point2i &count) { CHECK(threads.size() > 0 || MaxThreadIndex() == 1); if (threads.empty()) { for (int y = 0; y < count.y; ++y) for (int x = 0; x < count.x; ++x) func(Point2i(x, y)); return; } ParallelForLoop loop(std::move(func), count, CurrentProfilerState()); { std::lock_guard<std::mutex> lock(workListMutex); loop.next = workList; workList = &loop; } std::unique_lock<std::mutex> lock(workListMutex); workListCondition.notify_all(); // Help out with parallel loop iterations in the current thread while (!loop.Finished()) { // Run a chunk of loop iterations for _loop_ // Find the set of loop iterations to run next int64_t indexStart = loop.nextIndex; int64_t indexEnd = std::min(indexStart + loop.chunkSize, loop.maxIndex); // Update _loop_ to reflect iterations this thread will run loop.nextIndex = indexEnd; if (loop.nextIndex == loop.maxIndex) workList = loop.next; loop.activeWorkers++; // Run loop indices in _[indexStart, indexEnd)_ lock.unlock(); for (int64_t index = indexStart; index < indexEnd; ++index) { uint64_t oldState = ProfilerState; ProfilerState = loop.profilerState; if (loop.func1D) { loop.func1D(index); } // Handle other types of loops else { CHECK(loop.func2D); loop.func2D(Point2i(index % loop.nX, index / loop.nX)); } ProfilerState = oldState; } lock.lock(); // Update _loop_ to reflect completion of iterations loop.activeWorkers--; } }
void ParallelInit() { CHECK_EQ(threads.size(), 0); int nThreads = MaxThreadIndex(); ThreadIndex = 0; // Create a barrier so that we can be sure all worker threads get past // their call to ProfilerWorkerThreadInit() before we return from this // function. In turn, we can be sure that the profiling system isn't // started until after all worker threads have done that. std::shared_ptr<Barrier> barrier = std::make_shared<Barrier>(nThreads); // Launch one fewer worker thread than the total number we want doing // work, since the main thread helps out, too. for (int i = 0; i < nThreads - 1; ++i) threads.push_back(std::thread(workerThreadFunc, i + 1, barrier)); barrier->Wait(); }
SpatialLightDistribution::SpatialLightDistribution(const Scene &scene, int maxVoxels) : scene(scene) { // Compute the number of voxels so that the widest scene bounding box // dimension has maxVoxels voxels and the other dimensions have a number // of voxels so that voxels are roughly cube shaped. Bounds3f b = scene.WorldBound(); Vector3f diag = b.Diagonal(); Float bmax = diag[b.MaximumExtent()]; for (int i = 0; i < 3; ++i) nVoxels[i] = std::max(1, int(std::round(diag[i] / bmax * maxVoxels))); LOG(INFO) << "SpatialLightDistribution: scene bounds " << b << ", voxel res (" << nVoxels[0] << ", " << nVoxels[1] << ", " << nVoxels[2] << ")"; // It's important to pre-size the localVoxelDistributions vector, to // avoid race conditions with one thread resizing the vector while // another is reading from it. localVoxelDistributions.resize(MaxThreadIndex()); }
void MLTIntegrator::Render(const Scene &scene) { ProfilePhase p(Prof::IntegratorRender); std::unique_ptr<Distribution1D> lightDistr = ComputeLightPowerDistribution(scene); // Generate bootstrap samples and compute normalization constant $b$ int nBootstrapSamples = nBootstrap * (maxDepth + 1); std::vector<Float> bootstrapWeights(nBootstrapSamples, 0); if (scene.lights.size() > 0) { ProgressReporter progress(nBootstrap / 256, "Generating bootstrap paths"); std::vector<MemoryArena> bootstrapThreadArenas(MaxThreadIndex()); int chunkSize = Clamp(nBootstrap / 128, 1, 8192); ParallelFor([&](int i) { // Generate _i_th bootstrap sample MemoryArena &arena = bootstrapThreadArenas[threadIndex]; for (int depth = 0; depth <= maxDepth; ++depth) { int rngIndex = i * (maxDepth + 1) + depth; MLTSampler sampler(mutationsPerPixel, rngIndex, sigma, largeStepProbability, nSampleStreams); Point2f pRaster; bootstrapWeights[rngIndex] = L(scene, arena, lightDistr, sampler, depth, &pRaster).y(); arena.Reset(); } if ((i + 1 % 256) == 0) progress.Update(); }, nBootstrap, chunkSize); progress.Done(); } Distribution1D bootstrap(&bootstrapWeights[0], nBootstrapSamples); Float b = bootstrap.funcInt * (maxDepth + 1); // Run _nChains_ Markov chains in parallel Film &film = *camera->film; int64_t nTotalMutations = (int64_t)mutationsPerPixel * (int64_t)film.GetSampleBounds().Area(); if (scene.lights.size() > 0) { StatTimer timer(&renderingTime); const int progressFrequency = 32768; ProgressReporter progress(nTotalMutations / progressFrequency, "Rendering"); ParallelFor([&](int i) { int64_t nChainMutations = std::min((i + 1) * nTotalMutations / nChains, nTotalMutations) - i * nTotalMutations / nChains; // Follow {i}th Markov chain for _nChainMutations_ MemoryArena arena; // Select initial state from the set of bootstrap samples RNG rng(i); int bootstrapIndex = bootstrap.SampleDiscrete(rng.UniformFloat()); int depth = bootstrapIndex % (maxDepth + 1); // Initialize local variables for selected state MLTSampler sampler(mutationsPerPixel, bootstrapIndex, sigma, largeStepProbability, nSampleStreams); Point2f pCurrent; Spectrum LCurrent = L(scene, arena, lightDistr, sampler, depth, &pCurrent); // Run the Markov chain for _nChainMutations_ steps for (int64_t j = 0; j < nChainMutations; ++j) { sampler.StartIteration(); Point2f pProposed; Spectrum LProposed = L(scene, arena, lightDistr, sampler, depth, &pProposed); // Compute acceptance probability for proposed sample Float accept = std::min((Float)1, LProposed.y() / LCurrent.y()); // Splat both current and proposed samples to _film_ if (accept > 0) film.AddSplat(pProposed, LProposed * accept / LProposed.y()); film.AddSplat(pCurrent, LCurrent * (1 - accept) / LCurrent.y()); // Accept or reject the proposal if (rng.UniformFloat() < accept) { pCurrent = pProposed; LCurrent = LProposed; sampler.Accept(); ++acceptedMutations; } else sampler.Reject(); ++totalMutations; if ((i * nTotalMutations / nChains + j) % progressFrequency == 0) progress.Update(); arena.Reset(); } }, nChains); progress.Done(); } // Store final image computed with MLT camera->film->WriteImage(b / mutationsPerPixel); }