void IntersectorShortStack::Process(World const& world) { // If something has been changed we need to rebuild BVH if (!m_bvh || world.has_changed() || world.GetStateChange() != ShapeImpl::kStateChangeNone) { if (m_bvh) { m_device->DeleteBuffer(m_gpudata->bvh); m_device->DeleteBuffer(m_gpudata->vertices); } // Check if we can allocate enough stack memory Calc::DeviceSpec spec; m_device->GetSpec(spec); if (spec.max_alloc_size <= kMaxBatchSize * kMaxStackSize * sizeof(int)) { throw ExceptionImpl("fatbvh accelerator can't allocate enough stack memory, try using bvh instead"); } int numshapes = (int)world.shapes_.size(); int numvertices = 0; int numfaces = 0; // This buffer tracks mesh start index for next stage as mesh face indices are relative to 0 std::vector<int> mesh_vertices_start_idx(numshapes); std::vector<int> mesh_faces_start_idx(numshapes); auto builder = world.options_.GetOption("bvh.builder"); auto splits = world.options_.GetOption("bvh.sah.use_splits"); auto maxdepth = world.options_.GetOption("bvh.sah.max_split_depth"); auto overlap = world.options_.GetOption("bvh.sah.min_overlap"); auto tcost = world.options_.GetOption("bvh.sah.traversal_cost"); auto node_budget = world.options_.GetOption("bvh.sah.extra_node_budget"); auto nbins = world.options_.GetOption("bvh.sah.num_bins"); bool use_sah = false; bool use_splits = false; int max_split_depth = maxdepth ? (int)maxdepth->AsFloat() : 10; int num_bins = nbins ? (int)nbins->AsFloat() : 64; float min_overlap = overlap ? overlap->AsFloat() : 0.05f; float traversal_cost = tcost ? tcost->AsFloat() : 10.f; float extra_node_budget = node_budget ? node_budget->AsFloat() : 0.5f; if (builder && builder->AsString() == "sah") { use_sah = true; } if (splits && splits->AsFloat() > 0.f) { use_splits = true; } m_bvh.reset(use_splits ? new SplitBvh(traversal_cost, num_bins, max_split_depth, min_overlap, extra_node_budget) : new Bvh(traversal_cost, num_bins, use_sah) ); // Partition the array into meshes and instances std::vector<Shape const*> shapes(world.shapes_); auto firstinst = std::partition(shapes.begin(), shapes.end(), [&](Shape const* shape) { return !static_cast<ShapeImpl const*>(shape)->is_instance(); }); // Count the number of meshes int nummeshes = (int)std::distance(shapes.begin(), firstinst); // Count the number of instances int numinstances = (int)std::distance(firstinst, shapes.end()); for (int i = 0; i < nummeshes; ++i) { Mesh const* mesh = static_cast<Mesh const*>(shapes[i]); mesh_faces_start_idx[i] = numfaces; mesh_vertices_start_idx[i] = numvertices; numfaces += mesh->num_faces(); numvertices += mesh->num_vertices(); } for (int i = nummeshes; i < nummeshes + numinstances; ++i) { Instance const* instance = static_cast<Instance const*>(shapes[i]); Mesh const* mesh = static_cast<Mesh const*>(instance->GetBaseShape()); mesh_faces_start_idx[i] = numfaces; mesh_vertices_start_idx[i] = numvertices; numfaces += mesh->num_faces(); numvertices += mesh->num_vertices(); } // We can't avoild allocating it here, since bounds aren't stored anywhere std::vector<bbox> bounds(numfaces); // We handle meshes first collecting their world space bounds #pragma omp parallel for for (int i = 0; i < nummeshes; ++i) { Mesh const* mesh = static_cast<Mesh const*>(shapes[i]); for (int j = 0; j < mesh->num_faces(); ++j) { // Here we directly get world space bounds mesh->GetFaceBounds(j, false, bounds[mesh_faces_start_idx[i] + j]); } } // Then we handle instances. Need to flatten them into actual geometry. #pragma omp parallel for for (int i = nummeshes; i < nummeshes + numinstances; ++i) { Instance const* instance = static_cast<Instance const*>(shapes[i]); Mesh const* mesh = static_cast<Mesh const*>(instance->GetBaseShape()); // Instance is using its own transform for base shape geometry // so we need to get object space bounds and transform them manually matrix m, minv; instance->GetTransform(m, minv); for (int j = 0; j < mesh->num_faces(); ++j) { bbox tmp; mesh->GetFaceBounds(j, true, tmp); bounds[mesh_faces_start_idx[i] + j] = transform_bbox(tmp, m); } } m_bvh->Build(&bounds[0], numfaces); #ifdef RR_PROFILE m_bvh->PrintStatistics(std::cout); #endif // Check if the tree height is reasonable if (m_bvh->GetHeight() >= kMaxStackSize) { m_bvh.reset(nullptr); throw ExceptionImpl("fatbvh accelerator can cause stack overflow for this scene, try using bvh instead"); } FatNodeBvhTranslator translator; translator.Process(*m_bvh); // Update GPU data // Create vertex buffer { // Vertices m_gpudata->vertices = m_device->CreateBuffer(numvertices * sizeof(float3), Calc::BufferType::kRead); // Get the pointer to mapped data float3* vertexdata = nullptr; Calc::Event* e = nullptr; m_device->MapBuffer(m_gpudata->vertices, 0, 0, numvertices * sizeof(float3), Calc::MapType::kMapWrite, (void**)&vertexdata, &e); e->Wait(); m_device->DeleteEvent(e); // Here we need to put data in world space rather than object space // So we need to get the transform from the mesh and multiply each vertex matrix m, minv; #pragma omp parallel for for (int i = 0; i < nummeshes; ++i) { // Get the mesh Mesh const* mesh = static_cast<Mesh const*>(shapes[i]); // Get vertex buffer of the current mesh float3 const* myvertexdata = mesh->GetVertexData(); // Get mesh transform mesh->GetTransform(m, minv); //#pragma omp parallel for // Iterate thru vertices multiply and append them to GPU buffer for (int j = 0; j < mesh->num_vertices(); ++j) { vertexdata[mesh_vertices_start_idx[i] + j] = transform_point(myvertexdata[j], m); } } #pragma omp parallel for for (int i = nummeshes; i < nummeshes + numinstances; ++i) { Instance const* instance = static_cast<Instance const*>(shapes[i]); // Get the mesh Mesh const* mesh = static_cast<Mesh const*>(instance->GetBaseShape()); // Get vertex buffer of the current mesh float3 const* myvertexdata = mesh->GetVertexData(); // Get mesh transform instance->GetTransform(m, minv); //#pragma omp parallel for // Iterate thru vertices multiply and append them to GPU buffer for (int j = 0; j < mesh->num_vertices(); ++j) { vertexdata[mesh_vertices_start_idx[i] + j] = transform_point(myvertexdata[j], m); } } m_device->UnmapBuffer(m_gpudata->vertices, 0, vertexdata, &e); e->Wait(); m_device->DeleteEvent(e); } // Create face buffer { // This number is different from the number of faces for some BVHs auto numindices = m_bvh->GetNumIndices(); std::vector<FatNodeBvhTranslator::Face> facedata(numindices); // Here the point is to add mesh starting index to actual index contained within the mesh, // getting absolute index in the buffer. // Besides that we need to permute the faces accorningly to BVH reordering, whihc // is contained within bvh.primids_ int const* reordering = m_bvh->GetIndices(); for (int i = 0; i < numindices; ++i) { int indextolook4 = reordering[i]; // We need to find a shape corresponding to current face auto iter = std::upper_bound(mesh_faces_start_idx.cbegin(), mesh_faces_start_idx.cend(), indextolook4); // Find the index of the shape int shapeidx = static_cast<int>(std::distance(mesh_faces_start_idx.cbegin(), iter) - 1); // Get the mesh directly or out of instance Mesh const* mesh = nullptr; if (shapeidx < nummeshes) { mesh = static_cast<Mesh const*>(shapes[shapeidx]); } else { mesh = static_cast<Mesh const*>(static_cast<Instance const*>(shapes[shapeidx])->GetBaseShape()); } // Get vertex buffer of the current mesh Mesh::Face const* myfacedata = mesh->GetFaceData(); // Find face idx int faceidx = indextolook4 - mesh_faces_start_idx[shapeidx]; // Find mesh start idx int mystartidx = mesh_vertices_start_idx[shapeidx]; // Copy face data to GPU buffer facedata[i].idx[0] = myfacedata[faceidx].idx[0] + mystartidx; facedata[i].idx[1] = myfacedata[faceidx].idx[1] + mystartidx; facedata[i].idx[2] = myfacedata[faceidx].idx[2] + mystartidx; facedata[i].shapeidx = shapes[shapeidx]->GetId(); facedata[i].shape_mask = shapes[shapeidx]->GetMask(); facedata[i].id = faceidx; } translator.InjectIndices(&facedata[0]); } // Copy translated nodes first m_gpudata->bvh = m_device->CreateBuffer(translator.nodes_.size() * sizeof(FatNodeBvhTranslator::Node), Calc::BufferType::kRead, &translator.nodes_[0]); // Stack m_gpudata->stack = m_device->CreateBuffer(kMaxBatchSize*kMaxStackSize, Calc::BufferType::kWrite); // Make sure everything is commited m_device->Finish(0); } }
void BvhStrategy::Preprocess(World const& world) { // If something has been changed we need to rebuild BVH if (!m_bvh || world.has_changed() || world.GetStateChange() != ShapeImpl::kStateChangeNone) { int numshapes = (int)world.shapes_.size(); int numvertices = 0; int numfaces = 0; // This buffer tracks mesh start index for next stage as mesh face indices are relative to 0 std::vector<int> mesh_vertices_start_idx(numshapes); std::vector<int> mesh_faces_start_idx(numshapes); // Recreate it // First check if we need to use SAH auto builder = world.options_.GetOption("bvh.builder"); bool enablesah = false; if (builder && builder->AsString() == "sah") { enablesah = true; } m_bvh.reset(new Bvh(enablesah)); // Partition the array into meshes and instances std::vector<Shape const*> shapes(world.shapes_); auto firstinst = std::partition(shapes.begin(), shapes.end(), [&](Shape const* shape) { return !static_cast<ShapeImpl const*>(shape)->is_instance(); }); // Count the number of meshes int nummeshes = (int)std::distance(shapes.begin(), firstinst); // Count the number of instances int numinstances = (int)std::distance(firstinst, shapes.end()); for (int i = 0; i < nummeshes; ++i) { Mesh const* mesh = static_cast<Mesh const*>(shapes[i]); mesh_faces_start_idx[i] = numfaces; mesh_vertices_start_idx[i] = numvertices; numfaces += mesh->num_faces(); numvertices += mesh->num_vertices(); } for (int i = nummeshes; i < nummeshes + numinstances; ++i) { Instance const* instance = static_cast<Instance const*>(shapes[i]); Mesh const* mesh = static_cast<Mesh const*>(instance->GetBaseShape()); mesh_faces_start_idx[i] = numfaces; mesh_vertices_start_idx[i] = numvertices; numfaces += mesh->num_faces(); numvertices += mesh->num_vertices(); } // We can't avoild allocating it here, since bounds aren't stored anywhere std::vector<bbox> bounds(numfaces); std::vector<ShapeData> shapedata(numshapes); // We handle meshes first collecting their world space bounds #pragma omp parallel for for (int i = 0; i < nummeshes; ++i) { Mesh const* mesh = static_cast<Mesh const*>(shapes[i]); for (int j = 0; j < mesh->num_faces(); ++j) { // Here we directly get world space bounds mesh->GetFaceBounds(j, false, bounds[mesh_faces_start_idx[i] + j]); } shapedata[i].id = mesh->GetId(); shapedata[i].mask = mesh->GetMask(); } // Then we handle instances. Need to flatten them into actual geometry. #pragma omp parallel for for (int i = nummeshes; i < nummeshes + numinstances; ++i) { Instance const* instance = static_cast<Instance const*>(shapes[i]); Mesh const* mesh = static_cast<Mesh const*>(instance->GetBaseShape()); // Instance is using its own transform for base shape geometry // so we need to get object space bounds and transform them manually matrix m, minv; instance->GetTransform(m, minv); for (int j = 0; j < mesh->num_faces(); ++j) { bbox tmp; mesh->GetFaceBounds(j, true, tmp); bounds[mesh_faces_start_idx[i] + j] = transform_bbox(tmp, m); } shapedata[i].id = instance->GetId(); shapedata[i].mask = instance->GetMask(); } m_bvh->Build(&bounds[0], numfaces); PlainBvhTranslator translator; translator.Process(*m_bvh); // Update GPU data // Copy translated nodes first m_gpudata->bvh = m_device->CreateBuffer(translator.nodes_.size() * sizeof(PlainBvhTranslator::Node), Calc::BufferType::kRead, &translator.nodes_[0]); // Create vertex buffer { // Vertices m_gpudata->vertices = m_device->CreateBuffer(numvertices * sizeof(float3), Calc::BufferType::kRead); // Get the pointer to mapped data float3* vertexdata = nullptr; Calc::Event* e = nullptr; m_device->MapBuffer(m_gpudata->vertices, 0, 0, numvertices * sizeof(float3), Calc::MapType::kMapWrite, (void**)&vertexdata, &e); e->Wait(); m_device->DeleteEvent(e); // Here we need to put data in world space rather than object space // So we need to get the transform from the mesh and multiply each vertex matrix m, minv; #pragma omp parallel for for (int i = 0; i < nummeshes; ++i) { // Get the mesh Mesh const* mesh = static_cast<Mesh const*>(shapes[i]); // Get vertex buffer of the current mesh float3 const* myvertexdata = mesh->GetVertexData(); // Get mesh transform mesh->GetTransform(m, minv); //#pragma omp parallel for // Iterate thru vertices multiply and append them to GPU buffer for (int j = 0; j < mesh->num_vertices(); ++j) { vertexdata[mesh_vertices_start_idx[i] + j] = transform_point(myvertexdata[j], m); } } #pragma omp parallel for for (int i = nummeshes; i < nummeshes + numinstances; ++i) { Instance const* instance = static_cast<Instance const*>(shapes[i]); // Get the mesh Mesh const* mesh = static_cast<Mesh const*>(instance->GetBaseShape()); // Get vertex buffer of the current mesh float3 const* myvertexdata = mesh->GetVertexData(); // Get mesh transform instance->GetTransform(m, minv); //#pragma omp parallel for // Iterate thru vertices multiply and append them to GPU buffer for (int j = 0; j < mesh->num_vertices(); ++j) { vertexdata[mesh_vertices_start_idx[i] + j] = transform_point(myvertexdata[j], m); } } m_device->UnmapBuffer(m_gpudata->vertices, 0, vertexdata, &e); e->Wait(); m_device->DeleteEvent(e); } // Create face buffer { struct Face { // Up to 3 indices int idx[3]; // Shape index int shapeidx; // Primitive ID within the mesh int id; // Idx count int cnt; }; // Create face buffer m_gpudata->faces = m_device->CreateBuffer(numfaces * sizeof(Face), Calc::BufferType::kRead); // Get the pointer to mapped data Face* facedata = nullptr; Calc::Event* e = nullptr; m_device->MapBuffer(m_gpudata->faces, 0, 0, numfaces * sizeof(Face), Calc::BufferType::kWrite, (void**)&facedata, &e); e->Wait(); m_device->DeleteEvent(e); // Here the point is to add mesh starting index to actual index contained within the mesh, // getting absolute index in the buffer. // Besides that we need to permute the faces accorningly to BVH reordering, whihc // is contained within bvh.primids_ int const* reordering = m_bvh->GetIndices(); for (int i = 0; i < numfaces; ++i) { int indextolook4 = reordering[i]; // We need to find a shape corresponding to current face auto iter = std::upper_bound(mesh_faces_start_idx.cbegin(), mesh_faces_start_idx.cend(), indextolook4); // Find the index of the shape int shapeidx = static_cast<int>(std::distance(mesh_faces_start_idx.cbegin(), iter) - 1); // Get the mesh directly or out of instance Mesh const* mesh = nullptr; if (shapeidx < nummeshes) { mesh = static_cast<Mesh const*>(shapes[shapeidx]); } else { mesh = static_cast<Mesh const*>(static_cast<Instance const*>(shapes[shapeidx])->GetBaseShape()); } // Get vertex buffer of the current mesh Mesh::Face const* myfacedata = mesh->GetFaceData(); // Find face idx int faceidx = indextolook4 - mesh_faces_start_idx[shapeidx]; // Find mesh start idx int mystartidx = mesh_vertices_start_idx[shapeidx]; // Copy face data to GPU buffer facedata[i].idx[0] = myfacedata[faceidx].idx[0] + mystartidx; facedata[i].idx[1] = myfacedata[faceidx].idx[1] + mystartidx; facedata[i].idx[2] = myfacedata[faceidx].idx[2] + mystartidx; facedata[i].shapeidx = shapeidx; facedata[i].cnt = 0; facedata[i].id = faceidx; } m_device->UnmapBuffer(m_gpudata->faces, 0, facedata, &e); e->Wait(); m_device->DeleteEvent(e); } // Create shapes buffer m_gpudata->shapes = m_device->CreateBuffer(numshapes * sizeof(ShapeData), Calc::BufferType::kRead, &shapedata[0]); // Create helper raycounter buffer m_gpudata->raycnt = m_device->CreateBuffer(sizeof(int), Calc::BufferType::kWrite); // Make sure everything is commited m_device->Finish(0); } }