BVHNode* BVH::RecursiveBuild(uint32_t start, uint32_t end, uint32_t depth)
    maxDepth = fmaxf(depth, maxDepth);
    BVHNode* node = new BVHNode;

    //compute bounds of all primitives in BVH node
    BBox bbox;
    for(auto i = start; i < end; ++i)
        bbox = Union(bbox, workList[i].bounds);

    uint32_t nPrims = end - start;
    //if number of primitives are less than threshold, create leaf node
    if(nPrims <= MAX_LEAF_PRIM_NUM)
        uint32_t firstPrimOffset = orderedPrims.size();
        for(auto i = start; i < end; ++i)
            auto pIdx = workList[i].pIdx;
        node->InitLeaf(firstPrimOffset, nPrims, bbox);
        //compute bound of primitive centroids, choose split dimension
        BBox centroidBounds;
        for(auto i = start; i < end; ++i)
            centroidBounds = Union(centroidBounds, workList[i].bounds.bcenter);

        // split along max span axis
        int dim = centroidBounds.MaxExtent();

        //partition primitives into two sets and build children
        uint32_t mid = (end + start) / 2;
        // if max span axis is too small, create a leaf node
        if((centroidBounds.bmax[dim] - centroidBounds.bmin[dim]) < 1e-4)
            uint32_t firstPrimOffset = orderedPrims.size();
            for(auto i = start; i < end; ++i)
                auto pIdx = workList[i].pIdx;
            node->InitLeaf(firstPrimOffset, nPrims, bbox);

            return node;

        //partition primitives based on SAH
        std::vector<BucketInfo> buckets(nBuckets);
        float extent = centroidBounds.bmax[dim] - centroidBounds.bmin[dim];
        for(auto i = start; i < end; ++i)
            uint32_t b = nBuckets * ((workList[i].bounds.bcenter[dim] - centroidBounds.bmin[dim]) / extent);
            if(b == nBuckets) b -= 1;
            buckets[b].bounds = Union(buckets[b].bounds, workList[i].bounds);

        //compute costs for splitting after each bucket
        float cost[nBuckets - 1];
        for(auto i = 0; i < nBuckets - 1; ++i)
            BBox b0, b1;
            int count0 = 0, count1 = 0;

            for(auto j = 0; j <= i; ++j)
                b0 = Union(b0, buckets[j].bounds);
                count0 += buckets[j].count;
            for(auto j = i + 1; j < nBuckets; ++j)
                b1 = Union(b1, buckets[j].bounds);
                count1 += buckets[j].count;

            cost[i] = (count0 * b0.SurfaceArea() + count1 * b1.SurfaceArea()) / bbox.SurfaceArea();

        //find best split
        float minCost = cost[0];
        uint32_t bestSplit = 0;
        for(auto i = 1; i < nBuckets - 1; ++i)
            if(cost[i] < minCost)
                minCost = cost[i];
                bestSplit = i;

        //either create leaf or split at selected SAH bucket
        if(nPrims > MAX_LEAF_PRIM_NUM || minCost < nPrims)
            auto compare = [&](BVHPrimitiveInfo& p) {
                auto b = nBuckets * ((p.bounds.bcenter[dim] - centroidBounds.bmin[dim]) / extent);
                b = (b == nBuckets) ? (b - 1) : b;
                return b <= bestSplit;
            BVHPrimitiveInfo *pmid = std::partition(&workList[start], &workList[end - 1] + 1, compare);
            mid = pmid - &workList[0];
            uint32_t firstPrimOffset = orderedPrims.size();
            for(auto i = start; i < end; ++i)
                auto pIdx = workList[i].pIdx;
            node->InitLeaf(firstPrimOffset, nPrims, bbox);

            return node;

        node->InitInner(RecursiveBuild(start, mid, depth + 1), RecursiveBuild(mid, end, depth + 1));

    return node;