void BVH4mbBuilder::computePrimRefsTrianglesMB(const size_t threadID, const size_t numThreads) { DBG(PING); const size_t numGroups = scene->size(); const size_t startID = (threadID+0)*numPrimitives/numThreads; const size_t endID = (threadID+1)*numPrimitives/numThreads; PrimRef *__restrict__ const prims = this->prims; // === find first group containing startID === unsigned int g=0, numSkipped = 0; for (; g<numGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue; const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g); if (unlikely(!mesh->isEnabled())) continue; if (unlikely(mesh->numTimeSteps == 1)) continue; const size_t numTriangles = mesh->numTriangles; if (numSkipped + numTriangles > startID) break; numSkipped += numTriangles; } // === start with first group containing startID === mic_f bounds_scene_min((float)pos_inf); mic_f bounds_scene_max((float)neg_inf); mic_f bounds_centroid_min((float)pos_inf); mic_f bounds_centroid_max((float)neg_inf); unsigned int num = 0; unsigned int currentID = startID; unsigned int offset = startID - numSkipped; __align(64) PrimRef local_prims[2]; size_t numLocalPrims = 0; PrimRef *__restrict__ dest = &prims[currentID]; for (; g<numGroups; g++) { if (unlikely(scene->get(g) == NULL)) continue; if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue; const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g); if (unlikely(!mesh->isEnabled())) continue; if (unlikely(mesh->numTimeSteps == 1)) continue; for (unsigned int i=offset; i<mesh->numTriangles && currentID < endID; i++, currentID++) { //DBG_PRINT(currentID); const TriangleMeshScene::TriangleMesh::Triangle& tri = mesh->triangle(i); prefetch<PFHINT_L2>(&tri + L2_PREFETCH_ITEMS); prefetch<PFHINT_L1>(&tri + L1_PREFETCH_ITEMS); const float *__restrict__ const vptr0 = (float*)&mesh->vertex(tri.v[0]); const float *__restrict__ const vptr1 = (float*)&mesh->vertex(tri.v[1]); const float *__restrict__ const vptr2 = (float*)&mesh->vertex(tri.v[2]); const mic_f v0 = broadcast4to16f(vptr0); const mic_f v1 = broadcast4to16f(vptr1); const mic_f v2 = broadcast4to16f(vptr2); const mic_f bmin = min(min(v0,v1),v2); const mic_f bmax = max(max(v0,v1),v2); bounds_scene_min = min(bounds_scene_min,bmin); bounds_scene_max = max(bounds_scene_max,bmax); const mic_f centroid2 = bmin+bmax; bounds_centroid_min = min(bounds_centroid_min,centroid2); bounds_centroid_max = max(bounds_centroid_max,centroid2); store4f(&local_prims[numLocalPrims].lower,bmin); store4f(&local_prims[numLocalPrims].upper,bmax); local_prims[numLocalPrims].lower.a = g; local_prims[numLocalPrims].upper.a = i; //DBG_PRINT( local_prims[numLocalPrims] ); numLocalPrims++; if (unlikely(((size_t)dest % 64) != 0) && numLocalPrims == 1) { *dest = local_prims[0]; dest++; numLocalPrims--; } else { const mic_f twoAABBs = load16f(local_prims); if (numLocalPrims == 2) { numLocalPrims = 0; store16f_ngo(dest,twoAABBs); dest+=2; } } } if (currentID == endID) break; offset = 0; } /* is there anything left in the local queue? */ if (numLocalPrims % 2 != 0) *dest = local_prims[0]; /* update global bounds */ Centroid_Scene_AABB bounds; store4f(&bounds.centroid2.lower,bounds_centroid_min); store4f(&bounds.centroid2.upper,bounds_centroid_max); store4f(&bounds.geometry.lower,bounds_scene_min); store4f(&bounds.geometry.upper,bounds_scene_max); global_bounds.extend_atomic(bounds); }
void BVH4HairBuilder::parallelBinningGlobal(const size_t threadID, const size_t numThreads) { BuildRecord ¤t = global_sharedData.rec; const unsigned int items = current.items(); const unsigned int startID = current.begin + ((threadID+0)*items/numThreads); const unsigned int endID = current.begin + ((threadID+1)*items/numThreads); const mic_f centroidMin = broadcast4to16f(¤t.bounds.centroid2.lower); const mic_f centroidMax = broadcast4to16f(¤t.bounds.centroid2.upper); const mic_f centroidBoundsMin_2 = centroidMin; const mic_f centroidDiagonal_2 = centroidMax-centroidMin; const mic_f scale = select(centroidDiagonal_2 != 0.0f,rcp(centroidDiagonal_2) * mic_f(16.0f * 0.99f),mic_f::zero()); Bezier1i *__restrict__ const tmp_prims = (Bezier1i*)accel; fastbin_copy<Bezier1i,false>(prims,tmp_prims,startID,endID,centroidBoundsMin_2,scale,global_bin16[threadID]); LockStepTaskScheduler::syncThreadsWithReduction( threadID, numThreads, reduceBinsParallel, global_bin16 ); if (threadID == 0) { const float voxelArea = area(current.bounds.geometry); global_sharedData.split.cost = items * voxelArea * INTERSECTION_COST;; const Bin16 &bin16 = global_bin16[0]; for (size_t dim=0;dim<3;dim++) { if (unlikely(centroidDiagonal_2[dim] == 0.0f)) continue; const mic_f rArea = prefix_area_rl(bin16.min_x[dim],bin16.min_y[dim],bin16.min_z[dim], bin16.max_x[dim],bin16.max_y[dim],bin16.max_z[dim]); const mic_f lArea = prefix_area_lr(bin16.min_x[dim],bin16.min_y[dim],bin16.min_z[dim], bin16.max_x[dim],bin16.max_y[dim],bin16.max_z[dim]); const mic_i lnum = prefix_count(bin16.count[dim]); const mic_i rnum = mic_i(items) - lnum; const mic_i lblocks = (lnum + mic_i(3)) >> 2; const mic_i rblocks = (rnum + mic_i(3)) >> 2; const mic_m m_lnum = lnum == 0; const mic_m m_rnum = rnum == 0; const mic_f cost = select(m_lnum|m_rnum,mic_f::inf(),lArea * mic_f(lblocks) + rArea * mic_f(rblocks) + voxelArea ); if (lt(cost,mic_f(global_sharedData.split.cost))) { const mic_f min_cost = vreduce_min(cost); const mic_m m_pos = min_cost == cost; const unsigned long pos = bitscan64(m_pos); assert(pos < 15); if (pos < 15) { global_sharedData.split.cost = cost[pos]; global_sharedData.split.pos = pos+1; global_sharedData.split.dim = dim; global_sharedData.split.numLeft = lnum[pos]; } } } } }
BVH4mb::Triangle01 * __restrict__ const acc) { const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(geomID); const TriangleMeshScene::TriangleMesh::Triangle & tri = mesh->triangle(primID); const mic_i pID(primID); const mic_i gID(geomID); const float *__restrict__ const vptr0_t0 = (float*)&mesh->vertex(tri.v[0]); const float *__restrict__ const vptr1_t0 = (float*)&mesh->vertex(tri.v[1]); const float *__restrict__ const vptr2_t0 = (float*)&mesh->vertex(tri.v[2]); prefetch<PFHINT_L1>(vptr1_t0); prefetch<PFHINT_L1>(vptr2_t0); const mic_f v0_t0 = broadcast4to16f(vptr0_t0); const mic_f v1_t0 = broadcast4to16f(vptr1_t0); const mic_f v2_t0 = broadcast4to16f(vptr2_t0); const mic_f tri_accel_t0 = initTriangle1(v0_t0,v1_t0,v2_t0,gID,pID,mic_i(mesh->mask)); store16f_ngo(&acc->t0,tri_accel_t0); if ((int)mesh->numTimeSteps == 1) { store16f_ngo(&acc->t1,tri_accel_t0); } else { assert( (int)mesh->numTimeSteps == 2 ); const float *__restrict__ const vptr0_t1 = (float*)&mesh->vertex(tri.v[0],1);
} } } } void BVH4HairBuilder::parallelPartitioning(BuildRecord& current, Bezier1i * __restrict__ l_source, Bezier1i * __restrict__ r_source, Bezier1i * __restrict__ l_dest, Bezier1i * __restrict__ r_dest, const Split &split, Centroid_Scene_AABB &local_left, Centroid_Scene_AABB &local_right) { const mic_f centroidMin = broadcast4to16f(¤t.bounds.centroid2.lower); const mic_f centroidMax = broadcast4to16f(¤t.bounds.centroid2.upper); const mic_f centroidBoundsMin_2 = centroidMin; const mic_f centroidDiagonal_2 = centroidMax-centroidMin; const mic_f scale = select(centroidDiagonal_2 != 0.0f,rcp(centroidDiagonal_2) * mic_f(16.0f * 0.99f),mic_f::zero()); const unsigned int bestSplitDim = split.dim; const unsigned int bestSplit = split.pos; const mic_f c = mic_f(centroidBoundsMin_2[bestSplitDim]); const mic_f s = mic_f(scale[bestSplitDim]); mic_f leftSceneBoundsMin((float)pos_inf); mic_f leftSceneBoundsMax((float)neg_inf); mic_f leftCentroidBoundsMin((float)pos_inf);