int EnterTaskManager () { if (task_manager) { // no task manager started return 0; } task_manager = new TaskManager(); cout << IM(3) << "task-based parallelization (C++11 threads) using "<< task_manager->GetNumThreads() << " threads" << endl; #ifdef USE_NUMA numa_run_on_node (0); #endif #ifndef WIN32 // master has maximal priority ! int policy; struct sched_param param; pthread_getschedparam(pthread_self(), &policy, ¶m); param.sched_priority = sched_get_priority_max(policy); pthread_setschedparam(pthread_self(), policy, ¶m); #endif // WIN32 task_manager->StartWorkers(); ParallelFor (Range(100), [&] (int i) { ; }); // startup return task_manager->GetNumThreads(); }
void ComputeBeamDiffusionBSSRDF(Float g, Float eta, BSSRDFTable *t) { // Choose radius values of the diffusion profile discretization t->radiusSamples[0] = 0; t->radiusSamples[1] = 2.5e-3f; for (int i = 2; i < t->nRadiusSamples; ++i) t->radiusSamples[i] = t->radiusSamples[i - 1] * 1.2f; // Choose albedo values of the diffusion profile discretization for (int i = 0; i < t->nRhoSamples; ++i) t->rhoSamples[i] = (1 - std::exp(-8 * i / (Float)(t->nRhoSamples - 1))) / (1 - std::exp(-8)); ParallelFor([&](int i) { // Compute the diffusion profile for the _i_th albedo sample // Compute scattering profile for chosen albedo $\rho$ for (int j = 0; j < t->nRadiusSamples; ++j) { Float rho = t->rhoSamples[i], r = t->radiusSamples[j]; t->profile[i * t->nRadiusSamples + j] = 2 * Pi * r * (BeamDiffusionSS(rho, 1 - rho, g, eta, r) + BeamDiffusionMS(rho, 1 - rho, g, eta, r)); } // Compute effective albedo $\rho_{\roman{eff}}$ and CDF for importance // sampling t->rhoEff[i] = IntegrateCatmullRom(t->nRadiusSamples, t->radiusSamples.get(), &t->profile[i * t->nRadiusSamples], &t->profileCDF[i * t->nRadiusSamples]); }, t->nRhoSamples); }
// RealisticCamera Method Definitions RealisticCamera::RealisticCamera(const AnimatedTransform &CameraToWorld, Float shutterOpen, Float shutterClose, Float apertureDiameter, Float focusDistance, bool simpleWeighting, const char *lensFile, Film *film, const Medium *medium) : Camera(CameraToWorld, shutterOpen, shutterClose, film, medium), simpleWeighting(simpleWeighting) { // Load element data from lens description file std::vector<Float> lensData; if (ReadFloatFile(lensFile, &lensData) == false) { Error("Error reading lens specification file \"%s\".", lensFile); return; } if ((lensData.size() % 4) != 0) { Error( "Excess values in lens specification file \"%s\"; " "must be multiple-of-four values, read %d.", lensFile, (int)lensData.size()); return; } for (int i = 0; i < (int)lensData.size(); i += 4) { if (lensData[i] == 0) { if (apertureDiameter > lensData[i + 3]) { Warning( "Specified aperture diameter %f is greater than maximum " "possible %f. Clamping it.", apertureDiameter, lensData[i + 3]); } else { lensData[i + 3] = apertureDiameter; } } elementInterfaces.push_back((LensElementInterface) { lensData[i] * (Float).001, lensData[i + 1] * (Float).001, lensData[i + 2], lensData[i + 3] * Float(.001) / Float(2.) }); } // Compute lens--film distance for given focus distance Float fb = FocusBinarySearch(focusDistance); Info("Binary search focus: %f -> %f\n", fb, FocusDistance(fb)); elementInterfaces.back().thickness = FocusThickLens(focusDistance); Info("Thick lens focus: %f -> %f\n", elementInterfaces.back().thickness, FocusDistance(elementInterfaces.back().thickness)); // Compute exit pupil bounds at sampled points on the film int nSamples = 64; exitPupilBounds.resize(nSamples); ParallelFor([&](int i) { Float r0 = (Float)i / nSamples * film->diagonal / 2; Float r1 = (Float)(i + 1) / nSamples * film->diagonal / 2; exitPupilBounds[i] = BoundExitPupil(r0, r1); }, nSamples); }
// InfiniteAreaLight Method Definitions InfiniteAreaLight::InfiniteAreaLight(const Transform &LightToWorld, const Spectrum &L, int nSamples, const std::string &texmap) : Light((int)LightFlags::Infinite, LightToWorld, MediumInterface(), nSamples) { // Read texel data from _texmap_ and initialize _Lmap_ Point2i resolution; std::unique_ptr<RGBSpectrum[]> texels(nullptr); if (texmap != "") { texels = ReadImage(texmap, &resolution); if (texels) for (int i = 0; i < resolution.x * resolution.y; ++i) texels[i] *= L.ToRGBSpectrum(); } if (!texels) { resolution.x = resolution.y = 1; texels = std::unique_ptr<RGBSpectrum[]>(new RGBSpectrum[1]); texels[0] = L.ToRGBSpectrum(); } Lmap.reset(new MIPMap<RGBSpectrum>(resolution, texels.get())); // Initialize sampling PDFs for infinite area light // Compute scalar-valued image _img_ from environment map int width = 2 * Lmap->Width(), height = 2 * Lmap->Height(); std::unique_ptr<Float[]> img(new Float[width * height]); float fwidth = 0.5f / std::min(width, height); ParallelFor( [&](int64_t v) { Float vp = (v + .5f) / (Float)height; Float sinTheta = std::sin(Pi * (v + .5f) / height); for (int u = 0; u < width; ++u) { Float up = (u + .5f) / (Float)width; img[u + v * width] = Lmap->Lookup(Point2f(up, vp), fwidth).y(); img[u + v * width] *= sinTheta; } }, height, 32); // Compute sampling distributions for rows and columns of image distribution.reset(new Distribution2D(img.get(), width, height)); }
void FMeshElementCollector::ProcessTasks() { check(IsInRenderingThread()); check(!ParallelTasks.Num() || bUseAsyncTasks); if (ParallelTasks.Num()) { QUICK_SCOPE_CYCLE_COUNTER(STAT_FMeshElementCollector_ProcessTasks); TArray<TFunction<void()>*, SceneRenderingAllocator>& LocalParallelTasks(ParallelTasks); ParallelFor(ParallelTasks.Num(), [&LocalParallelTasks](int32 Index) { TFunction<void()>* Func = LocalParallelTasks[Index]; (*Func)(); Func->~TFunction<void()>(); } ); ParallelTasks.Empty(); } }
void MLTIntegrator::Render(const Scene &scene) { ProfilePhase p(Prof::IntegratorRender); std::unique_ptr<Distribution1D> lightDistr = ComputeLightPowerDistribution(scene); // Generate bootstrap samples and compute normalization constant $b$ int nBootstrapSamples = nBootstrap * (maxDepth + 1); std::vector<Float> bootstrapWeights(nBootstrapSamples, 0); if (scene.lights.size() > 0) { ProgressReporter progress(nBootstrap / 256, "Generating bootstrap paths"); std::vector<MemoryArena> bootstrapThreadArenas(MaxThreadIndex()); int chunkSize = Clamp(nBootstrap / 128, 1, 8192); ParallelFor([&](int i) { // Generate _i_th bootstrap sample MemoryArena &arena = bootstrapThreadArenas[threadIndex]; for (int depth = 0; depth <= maxDepth; ++depth) { int rngIndex = i * (maxDepth + 1) + depth; MLTSampler sampler(mutationsPerPixel, rngIndex, sigma, largeStepProbability, nSampleStreams); Point2f pRaster; bootstrapWeights[rngIndex] = L(scene, arena, lightDistr, sampler, depth, &pRaster).y(); arena.Reset(); } if ((i + 1 % 256) == 0) progress.Update(); }, nBootstrap, chunkSize); progress.Done(); } Distribution1D bootstrap(&bootstrapWeights[0], nBootstrapSamples); Float b = bootstrap.funcInt * (maxDepth + 1); // Run _nChains_ Markov chains in parallel Film &film = *camera->film; int64_t nTotalMutations = (int64_t)mutationsPerPixel * (int64_t)film.GetSampleBounds().Area(); if (scene.lights.size() > 0) { StatTimer timer(&renderingTime); const int progressFrequency = 32768; ProgressReporter progress(nTotalMutations / progressFrequency, "Rendering"); ParallelFor([&](int i) { int64_t nChainMutations = std::min((i + 1) * nTotalMutations / nChains, nTotalMutations) - i * nTotalMutations / nChains; // Follow {i}th Markov chain for _nChainMutations_ MemoryArena arena; // Select initial state from the set of bootstrap samples RNG rng(i); int bootstrapIndex = bootstrap.SampleDiscrete(rng.UniformFloat()); int depth = bootstrapIndex % (maxDepth + 1); // Initialize local variables for selected state MLTSampler sampler(mutationsPerPixel, bootstrapIndex, sigma, largeStepProbability, nSampleStreams); Point2f pCurrent; Spectrum LCurrent = L(scene, arena, lightDistr, sampler, depth, &pCurrent); // Run the Markov chain for _nChainMutations_ steps for (int64_t j = 0; j < nChainMutations; ++j) { sampler.StartIteration(); Point2f pProposed; Spectrum LProposed = L(scene, arena, lightDistr, sampler, depth, &pProposed); // Compute acceptance probability for proposed sample Float accept = std::min((Float)1, LProposed.y() / LCurrent.y()); // Splat both current and proposed samples to _film_ if (accept > 0) film.AddSplat(pProposed, LProposed * accept / LProposed.y()); film.AddSplat(pCurrent, LCurrent * (1 - accept) / LCurrent.y()); // Accept or reject the proposal if (rng.UniformFloat() < accept) { pCurrent = pProposed; LCurrent = LProposed; sampler.Accept(); ++acceptedMutations; } else sampler.Reject(); ++totalMutations; if ((i * nTotalMutations / nChains + j) % progressFrequency == 0) progress.Update(); arena.Reset(); } }, nChains); progress.Done(); } // Store final image computed with MLT camera->film->WriteImage(b / mutationsPerPixel); }
BVHBuildNode *BVHAccel::HLBVHBuild( MemoryArena &arena, const std::vector<BVHPrimitiveInfo> &primitiveInfo, int *totalNodes, std::vector<std::shared_ptr<Primitive>> &orderedPrims) const { // Compute bounding box of all primitive centroids Bounds3f bounds; for (const BVHPrimitiveInfo &pi : primitiveInfo) bounds = Union(bounds, pi.centroid); // Compute Morton indices of primitives std::vector<MortonPrimitive> mortonPrims(primitiveInfo.size()); ParallelFor([&](int i) { // Initialize _mortionPrims[i]_ for _i_th primitive constexpr int mortonBits = 10; constexpr int mortonScale = 1 << mortonBits; mortonPrims[i].primitiveIndex = primitiveInfo[i].primitiveNumber; Vector3f centroidOffset = bounds.Offset(primitiveInfo[i].centroid); mortonPrims[i].mortonCode = EncodeMorton3(centroidOffset * mortonScale); }, primitiveInfo.size(), 512); // Radix sort primitive Morton indices RadixSort(&mortonPrims); // Create LBVH treelets at bottom of BVH // Find intervals of primitives for each treelet std::vector<LBVHTreelet> treeletsToBuild; for (int start = 0, end = 1; end <= (int)mortonPrims.size(); ++end) { uint32_t mask = 0b00111111111111000000000000000000; if (end == (int)mortonPrims.size() || ((mortonPrims[start].mortonCode & mask) != (mortonPrims[end].mortonCode & mask))) { // Add entry to _treeletsToBuild_ for this treelet int nPrimitives = end - start; int maxBVHNodes = 2 * nPrimitives; BVHBuildNode *nodes = arena.Alloc<BVHBuildNode>(maxBVHNodes, false); treeletsToBuild.push_back({start, nPrimitives, nodes}); start = end; } } // Create LBVHs for treelets in parallel std::atomic<int> atomicTotal(0), orderedPrimsOffset(0); orderedPrims.resize(primitives.size()); ParallelFor([&](int i) { // Generate _i_th LBVH treelet int nodesCreated = 0; const int firstBitIndex = 29 - 12; LBVHTreelet &tr = treeletsToBuild[i]; tr.buildNodes = emitLBVH(tr.buildNodes, primitiveInfo, &mortonPrims[tr.startIndex], tr.nPrimitives, &nodesCreated, orderedPrims, &orderedPrimsOffset, firstBitIndex); atomicTotal += nodesCreated; }, treeletsToBuild.size()); *totalNodes = atomicTotal; // Create and return SAH BVH from LBVH treelets std::vector<BVHBuildNode *> finishedTreelets; finishedTreelets.reserve(treeletsToBuild.size()); for (LBVHTreelet &treelet : treeletsToBuild) finishedTreelets.push_back(treelet.buildNodes); return buildUpperSAH(arena, finishedTreelets, 0, finishedTreelets.size(), totalNodes); }
void BDPTIntegrator::Render(const Scene &scene) { ProfilePhase p(Prof::IntegratorRender); // Compute _lightDistr_ for sampling lights proportional to power std::unique_ptr<Distribution1D> lightDistr = ComputeLightPowerDistribution(scene); // Partition the image into tiles Film *film = camera->film; const Bounds2i sampleBounds = film->GetSampleBounds(); const Vector2i sampleExtent = sampleBounds.Diagonal(); const int tileSize = 16; const int nXTiles = (sampleExtent.x + tileSize - 1) / tileSize; const int nYTiles = (sampleExtent.y + tileSize - 1) / tileSize; ProgressReporter reporter(nXTiles * nYTiles, "Rendering"); // Allocate buffers for debug visualization const int bufferCount = (1 + maxDepth) * (6 + maxDepth) / 2; std::vector<std::unique_ptr<Film>> weightFilms(bufferCount); if (visualizeStrategies || visualizeWeights) { for (int depth = 0; depth <= maxDepth; ++depth) { for (int s = 0; s <= depth + 2; ++s) { int t = depth + 2 - s; if (t == 0 || (s == 1 && t == 1)) continue; char filename[32]; snprintf(filename, sizeof(filename), "bdpt_d%02i_s%02i_t%02i.exr", depth, s, t); weightFilms[BufferIndex(s, t)] = std::unique_ptr<Film>(new Film( film->fullResolution, Bounds2f(Point2f(0, 0), Point2f(1, 1)), std::unique_ptr<Filter>(CreateBoxFilter(ParamSet())), film->diagonal * 1000, filename, 1.f)); } } } // Render and write the output image to disk if (scene.lights.size() > 0) { StatTimer timer(&renderingTime); ParallelFor([&](const Point2i tile) { // Render a single tile using BDPT MemoryArena arena; int seed = tile.y * nXTiles + tile.x; std::unique_ptr<Sampler> tileSampler = sampler->Clone(seed); int x0 = sampleBounds.pMin.x + tile.x * tileSize; int x1 = std::min(x0 + tileSize, sampleBounds.pMax.x); int y0 = sampleBounds.pMin.y + tile.y * tileSize; int y1 = std::min(y0 + tileSize, sampleBounds.pMax.y); Bounds2i tileBounds(Point2i(x0, y0), Point2i(x1, y1)); std::unique_ptr<FilmTile> filmTile = camera->film->GetFilmTile(tileBounds); for (Point2i pPixel : tileBounds) { tileSampler->StartPixel(pPixel); do { // Generate a single sample using BDPT Point2f pFilm = (Point2f)pPixel + tileSampler->Get2D(); // Trace the camera and light subpaths Vertex *cameraVertices = arena.Alloc<Vertex>(maxDepth + 2); Vertex *lightVertices = arena.Alloc<Vertex>(maxDepth + 1); int nCamera = GenerateCameraSubpath( scene, *tileSampler, arena, maxDepth + 2, *camera, pFilm, cameraVertices); int nLight = GenerateLightSubpath( scene, *tileSampler, arena, maxDepth + 1, cameraVertices[0].time(), *lightDistr, lightVertices); // Execute all BDPT connection strategies Spectrum L(0.f); for (int t = 1; t <= nCamera; ++t) { for (int s = 0; s <= nLight; ++s) { int depth = t + s - 2; if ((s == 1 && t == 1) || depth < 0 || depth > maxDepth) continue; // Execute the $(s, t)$ connection strategy and // update _L_ Point2f pFilmNew = pFilm; Float misWeight = 0.f; Spectrum Lpath = ConnectBDPT( scene, lightVertices, cameraVertices, s, t, *lightDistr, *camera, *tileSampler, &pFilmNew, &misWeight); if (visualizeStrategies || visualizeWeights) { Spectrum value; if (visualizeStrategies) value = misWeight == 0 ? 0 : Lpath / misWeight; if (visualizeWeights) value = Lpath; weightFilms[BufferIndex(s, t)]->AddSplat( pFilmNew, value); } if (t != 1) L += Lpath; else film->AddSplat(pFilmNew, Lpath); } } filmTile->AddSample(pFilm, L); arena.Reset(); } while (tileSampler->StartNextSample()); } film->MergeFilmTile(std::move(filmTile)); reporter.Update(); }, Point2i(nXTiles, nYTiles)); reporter.Done(); } film->WriteImage(1.0f / sampler->samplesPerPixel); // Write buffers for debug visualization if (visualizeStrategies || visualizeWeights) { const Float invSampleCount = 1.0f / sampler->samplesPerPixel; for (size_t i = 0; i < weightFilms.size(); ++i) if (weightFilms[i]) weightFilms[i]->WriteImage(invSampleCount); } }
void MLTIntegrator::Render(const Scene &scene) { lightDistr = std::unique_ptr<Distribution1D>(ComputeLightSamplingCDF(scene)); Film &film = *camera->film; // Generate bootstrap samples and compute $b$ int bootstrapSamples = nBootstrap * (maxDepth + 1); std::unique_ptr<Float[]> bootstrapWeights(new Float[bootstrapSamples]); { ProgressReporter progress(nBootstrap, "Generating bootstrap paths"); ParallelFor([&](int k) { // Generate a single bootstrap sample MemoryArena arena; for (int depth = 0; depth <= maxDepth; ++depth) { uint32_t uIndex = k * (maxDepth + 1) + depth; MLTSampler sampler(mutationsPerPixel, uIndex, sigma, largeStepProb); Point2f samplePos; bootstrapWeights[uIndex] = L(scene, arena, sampler, depth, &samplePos).y(); } progress.Update(); }, nBootstrap); progress.Done(); } Distribution1D bootstrap(bootstrapWeights.get(), bootstrapSamples); Float b = bootstrap.funcInt * (maxDepth + 1); // Run _nChains_ Markov Chains in parallel int64_t nTotalMutations = mutationsPerPixel * (int64_t)film.GetSampleBounds().Area(); { StatTimer timer(&renderingTime); ProgressReporter progress(nTotalMutations / 100, "Rendering"); ParallelFor([&](int k) { int64_t nChainMutations = std::min((k + 1) * nTotalMutations / nChains, nTotalMutations) - k * nTotalMutations / nChains; MemoryArena arena; std::unique_ptr<FilmTile> filmTile = film.GetFilmTile(Bounds2i( film.croppedPixelBounds.pMin, film.croppedPixelBounds.pMin)); // Select initial state from the set of bootstrap samples RNG rng(PCG32_DEFAULT_STATE, k); int bootstrapIndex = bootstrap.SampleDiscrete(rng.UniformFloat()); int depth = bootstrapIndex % (maxDepth + 1); // Initialize local variables for selected state MLTSampler sampler(mutationsPerPixel, bootstrapIndex, sigma, largeStepProb); Point2f currentPos, proposalPos; Spectrum currentL, proposalL; currentL = L(scene, arena, sampler, depth, ¤tPos); // Run the Markov Chain for _nChainMutations_ steps for (int64_t i = 0; i != nChainMutations; ++i) { sampler.Begin(); proposalL = L(scene, arena, sampler, depth, &proposalPos); // Compute the acceptance rate Float accept = std::min((Float)1, proposalL.y() / currentL.y()); // Splat both current and proposed samples to _FilmTile_ if (accept > 0) filmTile->AddSplat(proposalPos, proposalL * accept / proposalL.y()); filmTile->AddSplat(currentPos, currentL * (1 - accept) / currentL.y()); // Accept or reject the proposal if (rng.UniformFloat() < accept) { currentPos = proposalPos; currentL = proposalL; sampler.Accept(); ++acceptedMutations; } else { sampler.Reject(); } ++totalMutations; if (i % 100 == 0) progress.Update(); } film.MergeFilmTile(std::move(filmTile)); }, nChains); progress.Done(); } film.WriteImage(b / mutationsPerPixel); }