void QBVHAccel::Init(const std::deque<Mesh *> &meshes, const unsigned int totalVertexCount, const unsigned int totalTriangleCount) { assert (!initialized); preprocessedMesh = TriangleMesh::Merge(totalVertexCount, totalTriangleCount, meshes, &meshIDs, &meshTriangleIDs); assert (preprocessedMesh->GetTotalVertexCount() == totalVertexCount); assert (preprocessedMesh->GetTotalTriangleCount() == totalTriangleCount); LR_LOG("Total vertices memory usage: " << totalVertexCount * sizeof(Point) / 1024 << "Kbytes"); LR_LOG( "Total triangles memory usage: " << totalTriangleCount * sizeof(Triangle) / 1024 << "Kbytes"); Init(preprocessedMesh); }
OpenCLBVHKernel(OpenCLIntersectionDevice *dev) : OpenCLKernel(dev), vertsBuff(NULL), trisBuff(NULL), bvhBuff(NULL) { const Context *deviceContext = device->GetContext(); cl::Context &oclContext = device->GetOpenCLContext(); cl::Device &oclDevice = device->GetOpenCLDevice(); const std::string &deviceName(device->GetName()); // Compile sources std::string code( _LUXRAYS_POINT_OCLDEFINE _LUXRAYS_VECTOR_OCLDEFINE _LUXRAYS_RAY_OCLDEFINE _LUXRAYS_RAYHIT_OCLDEFINE _LUXRAYS_TRIANGLE_OCLDEFINE _LUXRAYS_BBOX_OCLDEFINE); code += KernelSource_BVH; cl::Program::Sources source(1, std::make_pair(code.c_str(), code.length())); cl::Program program = cl::Program(oclContext, source); try { VECTOR_CLASS<cl::Device> buildDevice; buildDevice.push_back(oclDevice); program.build(buildDevice); } catch (cl::Error err) { cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH compilation error:\n" << strError.c_str()); throw err; } delete kernel; kernel = new cl::Kernel(program, "Intersect"); kernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH kernel work group size: " << workGroupSize); kernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &workGroupSize); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Suggested work group size: " << workGroupSize); if (device->GetForceWorkGroupSize() > 0) { workGroupSize = device->GetForceWorkGroupSize(); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Forced work group size: " << workGroupSize); } }
OpenCLKernel *BVHAccel::NewOpenCLKernel(OpenCLIntersectionDevice *dev, unsigned int stackSize, bool disableImageStorage) const { OpenCLBVHKernel *kernel = new OpenCLBVHKernel(dev); const Context *deviceContext = dev->GetContext(); cl::Context &oclContext = dev->GetOpenCLContext(); const std::string &deviceName(dev->GetName()); OpenCLDeviceDescription *deviceDesc = dev->GetDeviceDesc(); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Vertices buffer size: " << (sizeof(Point) * preprocessedMesh->GetTotalVertexCount() / 1024) << "Kbytes"); cl::Buffer *vertsBuff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(Point) * preprocessedMesh->GetTotalVertexCount(), preprocessedMesh->GetVertices()); deviceDesc->AllocMemory(vertsBuff->getInfo<CL_MEM_SIZE>()); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Triangle indices buffer size: " << (sizeof(Triangle) * preprocessedMesh->GetTotalTriangleCount() / 1024) << "Kbytes"); cl::Buffer *trisBuff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(Triangle) * preprocessedMesh->GetTotalTriangleCount(), preprocessedMesh->GetTriangles()); deviceDesc->AllocMemory(trisBuff->getInfo<CL_MEM_SIZE>()); LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH buffer size: " << (sizeof(BVHAccelArrayNode) * nNodes / 1024) << "Kbytes"); cl::Buffer *bvhBuff = new cl::Buffer(oclContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(BVHAccelArrayNode) * nNodes, (void*)bvhTree); deviceDesc->AllocMemory(bvhBuff->getInfo<CL_MEM_SIZE>()); kernel->SetBuffers(vertsBuff, preprocessedMesh->GetTotalTriangleCount(), trisBuff, nNodes, bvhBuff); return kernel; }
void BVHAccel::Init(const std::deque<Mesh *> &meshes, const unsigned int totalVertexCount, const unsigned int totalTriangleCount) { assert (!initialized); preprocessedMesh = TriangleMesh::Merge(totalVertexCount, totalTriangleCount, meshes, &preprocessedMeshIDs, &preprocessedMeshTriangleIDs); assert (preprocessedMesh->GetTotalVertexCount() == totalVertexCount); assert (preprocessedMesh->GetTotalTriangleCount() == totalTriangleCount); LR_LOG(ctx, "Total vertices memory usage: " << totalVertexCount * sizeof(Point) / 1024 << "Kbytes"); LR_LOG(ctx, "Total triangles memory usage: " << totalTriangleCount * sizeof(Triangle) / 1024 << "Kbytes"); const Point *v = preprocessedMesh->GetVertices(); const Triangle *p = preprocessedMesh->GetTriangles(); std::vector<BVHAccelTreeNode *> bvList; for (unsigned int i = 0; i < totalTriangleCount; ++i) { BVHAccelTreeNode *ptr = new BVHAccelTreeNode(); ptr->bbox = p[i].WorldBound(v); // NOTE - Ratow - Expand bbox a little to make sure rays collide ptr->bbox.Expand(MachineEpsilon::E(ptr->bbox)); ptr->primitive = i; ptr->leftChild = NULL; ptr->rightSibling = NULL; bvList.push_back(ptr); } LR_LOG(ctx, "Building Bounding Volume Hierarchy, primitives: " << totalTriangleCount); nNodes = 0; BVHAccelTreeNode *rootNode = BuildHierarchy(bvList, 0, bvList.size(), 2); LR_LOG(ctx, "Pre-processing Bounding Volume Hierarchy, total nodes: " << nNodes); bvhTree = new BVHAccelArrayNode[nNodes]; BuildArray(rootNode, 0); FreeHierarchy(rootNode); LR_LOG(ctx, "Total BVH memory usage: " << nNodes * sizeof(BVHAccelArrayNode) / 1024 << "Kbytes"); LR_LOG(ctx, "Finished building Bounding Volume Hierarchy array"); initialized = true; }
void QBVHAccel::Init(const Mesh *m) { assert (!initialized); mesh = m; const unsigned int totalTriangleCount = mesh->GetTotalTriangleCount(); // Temporary data for building u_int *primsIndexes = new u_int[totalTriangleCount + 3]; // For the case where // the last quad would begin at the last primitive // (or the second or third last primitive) // The number of nodes depends on the number of primitives, // and is bounded by 2 * nPrims - 1. // Even if there will normally have at least 4 primitives per leaf, // it is not always the case => continue to use the normal bounds. nNodes = 0; maxNodes = 1; for (u_int layer = ((totalTriangleCount + maxPrimsPerLeaf - 1) / maxPrimsPerLeaf + 3) / 4; layer != 1; layer = (layer + 3) / 4) maxNodes += layer; nodes = AllocAligned<QBVHNode> (maxNodes); for (u_int i = 0; i < maxNodes; ++i) nodes[i] = QBVHNode(); // The arrays that will contain // - the bounding boxes for all triangles // - the centroids for all triangles BBox *primsBboxes = new BBox[totalTriangleCount]; Point *primsCentroids = new Point[totalTriangleCount]; // The bouding volume of all the centroids BBox centroidsBbox; const Point *verts = mesh->GetVertices(); const Triangle *triangles = mesh->GetTriangles(); // Fill each base array for (u_int i = 0; i < totalTriangleCount; ++i) { // This array will be reorganized during construction. primsIndexes[i] = i; // Compute the bounding box for the triangle primsBboxes[i] = triangles[i].WorldBound(verts); primsBboxes[i].Expand(RAY_EPSILON); primsCentroids[i] = (primsBboxes[i].pMin + primsBboxes[i].pMax) * .5f; // Update the global bounding boxes worldBound = Union(worldBound, primsBboxes[i]); centroidsBbox = Union(centroidsBbox, primsCentroids[i]); } // Arbitrarily take the last primitive for the last 3 primsIndexes[totalTriangleCount] = totalTriangleCount - 1; primsIndexes[totalTriangleCount + 1] = totalTriangleCount - 1; primsIndexes[totalTriangleCount + 2] = totalTriangleCount - 1; // Recursively build the tree LR_LOG( "Building QBVH, primitives: " << totalTriangleCount << ", initial nodes: " << maxNodes); nQuads = 0; BuildTree(0, totalTriangleCount, primsIndexes, primsBboxes, primsCentroids, worldBound, centroidsBbox, -1, 0, 0); prims = AllocAligned<QuadTriangle> (nQuads); nQuads = 0; PreSwizzle(0, primsIndexes); LR_LOG( "QBVH completed with " << nNodes << "/" << maxNodes << " nodes"); LR_LOG( "Total QBVH memory usage: " << nNodes * sizeof(QBVHNode) / 1024 << "Kbytes"); LR_LOG( "Total QBVH QuadTriangle count: " << nQuads); LR_LOG( "Max. QBVH Depth: " << maxDepth); // Release temporary memory delete[] primsBboxes; delete[] primsCentroids; delete[] primsIndexes; initialized = true; }
void QBVHAccel::BuildTree(u_int start, u_int end, u_int *primsIndexes, BBox *primsBboxes, Point *primsCentroids, const BBox &nodeBbox, const BBox ¢roidsBbox, int32_t parentIndex, int32_t childIndex, int depth) { maxDepth = (depth >= maxDepth) ? depth : maxDepth; // Set depth so we know how much stack we need later. // Create a leaf ? //******** if (depth > 64 || end - start <= maxPrimsPerLeaf) { if (depth > 64) { LR_LOG( "Maximum recursion depth reached while constructing QBVH, forcing a leaf node"); if (end - start > 64) { LR_LOG( "QBVH unable to handle geometry, too many primitives in leaf"); } } CreateTempLeaf(parentIndex, childIndex, start, end, nodeBbox); return; } int32_t currentNode = parentIndex; int32_t leftChildIndex = childIndex; int32_t rightChildIndex = childIndex + 1; // Number of primitives in each bin int bins[NB_BINS]; // Bbox of the primitives in the bin BBox binsBbox[NB_BINS]; //-------------- // Fill in the bins, considering all the primitives when a given // threshold is reached, else considering only a portion of the // primitives for the binned-SAH process. Also compute the bins bboxes // for the primitives. for (u_int i = 0; i < NB_BINS; ++i) bins[i] = 0; u_int step = (end - start < fullSweepThreshold) ? 1 : skipFactor; // Choose the split axis, taking the axis of maximum extent for the // centroids (else weird cases can occur, where the maximum extent axis // for the nodeBbox is an axis of 0 extent for the centroids one.). const int axis = centroidsBbox.MaximumExtent(); // Precompute values that are constant with respect to the current // primitive considered. const float k0 = centroidsBbox.pMin[axis]; const float k1 = NB_BINS / (centroidsBbox.pMax[axis] - k0); // If the bbox is a point, create a leaf, hoping there are not more // than 64 primitives that share the same center. if (k1 == INFINITY) { if (end - start > 64) { LR_LOG( "QBVH unable to handle geometry, too many primitives with the same centroid"); } CreateTempLeaf(parentIndex, childIndex, start, end, nodeBbox); return; } // Create an intermediate node if the depth indicates to do so. // Register the split axis. if (depth % 2 == 0) { currentNode = CreateIntermediateNode(parentIndex, childIndex, nodeBbox); leftChildIndex = 0; rightChildIndex = 2; } for (u_int i = start; i < end; i += step) { u_int primIndex = primsIndexes[i]; // Binning is relative to the centroids bbox and to the // primitives' centroid. const int binId = Min(NB_BINS - 1, Floor2Int(k1 * (primsCentroids[primIndex][axis] - k0))); bins[binId]++; binsBbox[binId] = Union(binsBbox[binId], primsBboxes[primIndex]); } //-------------- // Evaluate where to split. // Cumulative number of primitives in the bins from the first to the // ith, and from the last to the ith. int nbPrimsLeft[NB_BINS]; int nbPrimsRight[NB_BINS]; // The corresponding cumulative bounding boxes. BBox bboxesLeft[NB_BINS]; BBox bboxesRight[NB_BINS]; // The corresponding volumes. float vLeft[NB_BINS]; float vRight[NB_BINS]; BBox currentBboxLeft, currentBboxRight; int currentNbLeft = 0, currentNbRight = 0; for (int i = 0; i < NB_BINS; ++i) { //----- // Left side // Number of prims currentNbLeft += bins[i]; nbPrimsLeft[i] = currentNbLeft; // Prims bbox currentBboxLeft = Union(currentBboxLeft, binsBbox[i]); bboxesLeft[i] = currentBboxLeft; // Surface area vLeft[i] = currentBboxLeft.SurfaceArea(); //----- // Right side // Number of prims int rightIndex = NB_BINS - 1 - i; currentNbRight += bins[rightIndex]; nbPrimsRight[rightIndex] = currentNbRight; // Prims bbox currentBboxRight = Union(currentBboxRight, binsBbox[rightIndex]); bboxesRight[rightIndex] = currentBboxRight; // Surface area vRight[rightIndex] = currentBboxRight.SurfaceArea(); } int minBin = -1; float minCost = INFINITY; // Find the best split axis, // there must be at least a bin on the right side for (int i = 0; i < NB_BINS - 1; ++i) { float cost = vLeft[i] * nbPrimsLeft[i] + vRight[i + 1] * nbPrimsRight[i + 1]; if (cost < minCost) { minBin = i; minCost = cost; } } //----------------- // Make the partition, in a "quicksort partitioning" way, // the pivot being the position of the split plane // (no more binId computation) // track also the bboxes (primitives and centroids) // for the left and right halves. // The split plane coordinate is the coordinate of the end of // the chosen bin along the split axis float splitPos = centroidsBbox.pMin[axis] + (minBin + 1) * (centroidsBbox.pMax[axis] - centroidsBbox.pMin[axis]) / NB_BINS; BBox leftChildBbox, rightChildBbox; BBox leftChildCentroidsBbox, rightChildCentroidsBbox; u_int storeIndex = start; for (u_int i = start; i < end; ++i) { u_int primIndex = primsIndexes[i]; if (primsCentroids[primIndex][axis] <= splitPos) { // Swap primsIndexes[i] = primsIndexes[storeIndex]; primsIndexes[storeIndex] = primIndex; ++storeIndex; // Update the bounding boxes, // this triangle is on the left side leftChildBbox = Union(leftChildBbox, primsBboxes[primIndex]); leftChildCentroidsBbox = Union(leftChildCentroidsBbox, primsCentroids[primIndex]); } else { // Update the bounding boxes, // this triangle is on the right side. rightChildBbox = Union(rightChildBbox, primsBboxes[primIndex]); rightChildCentroidsBbox = Union(rightChildCentroidsBbox, primsCentroids[primIndex]); } } // Build recursively BuildTree(start, storeIndex, primsIndexes, primsBboxes, primsCentroids, leftChildBbox, leftChildCentroidsBbox, currentNode, leftChildIndex, depth + 1); BuildTree(storeIndex, end, primsIndexes, primsBboxes, primsCentroids, rightChildBbox, rightChildCentroidsBbox, currentNode, rightChildIndex, depth + 1); }