void QuadMesh::interpolate(const RTCInterpolateArguments* const args) { unsigned int primID = args->primID; float u = args->u; float v = args->v; RTCBufferType bufferType = args->bufferType; unsigned int bufferSlot = args->bufferSlot; float* P = args->P; float* dPdu = args->dPdu; float* dPdv = args->dPdv; float* ddPdudu = args->ddPdudu; float* ddPdvdv = args->ddPdvdv; float* ddPdudv = args->ddPdudv; unsigned int valueCount = args->valueCount; /* calculate base pointer and stride */ assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); const char* src = nullptr; size_t stride = 0; if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { src = vertexAttribs[bufferSlot].getPtr(); stride = vertexAttribs[bufferSlot].getStride(); } else { src = vertices[bufferSlot].getPtr(); stride = vertices[bufferSlot].getStride(); } for (unsigned int i=0; i<valueCount; i+=4) { const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount)); const size_t ofs = i*sizeof(float); const Quad& tri = quad(primID); const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); const vfloat4 p3 = vfloat4::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]); const vbool4 left = u+v <= 1.0f; const vfloat4 Q0 = select(left,p0,p2); const vfloat4 Q1 = select(left,p1,p3); const vfloat4 Q2 = select(left,p3,p1); const vfloat4 U = select(left,u,vfloat4(1.0f)-u); const vfloat4 V = select(left,v,vfloat4(1.0f)-v); const vfloat4 W = 1.0f-U-V; if (P) { vfloat4::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); } if (dPdu) { assert(dPdu); vfloat4::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)); assert(dPdv); vfloat4::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)); } if (ddPdudu) { assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero)); assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero)); assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero)); } } }
size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth) { /*! nothing to rotate if we reached a leaf node. */ if (parentRef.isBarrier()) return 0; if (parentRef.isLeaf()) return 0; Node* parent = parentRef.node(); /*! rotate all children first */ vint4 cdepth; for (size_t c=0; c<4; c++) cdepth[c] = (int)rotate(parent->child(c),depth+1); /* compute current areas of all children */ vfloat4 sizeX = parent->upper_x-parent->lower_x; vfloat4 sizeY = parent->upper_y-parent->lower_y; vfloat4 sizeZ = parent->upper_z-parent->lower_z; vfloat4 childArea = sizeX*(sizeY + sizeZ) + sizeY*sizeZ; /*! get node bounds */ BBox<vfloat4> child1_0,child1_1,child1_2,child1_3; parent->bounds(child1_0,child1_1,child1_2,child1_3); /*! Find best rotation. We pick a first child (child1) and a sub-child (child2child) of a different second child (child2), and swap child1 and child2child. We perform the best such swap. */ float bestArea = 0; size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1; for (size_t c2=0; c2<4; c2++) { /*! ignore leaf nodes as we cannot descent into them */ if (parent->child(c2).isBarrier()) continue; if (parent->child(c2).isLeaf()) continue; Node* child2 = parent->child(c2).node(); /*! transpose child bounds */ BBox<vfloat4> child2c0,child2c1,child2c2,child2c3; child2->bounds(child2c0,child2c1,child2c2,child2c3); /*! put child1_0 at each child2 position */ float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3)); float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3)); float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3)); float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0)); vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03); vfloat4 min0 = vreduce_min(cost0); int pos0 = (int)__bsf(movemask(min0 == cost0)); /*! put child1_1 at each child2 position */ float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3)); float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3)); float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3)); float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1)); vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13); vfloat4 min1 = vreduce_min(cost1); int pos1 = (int)__bsf(movemask(min1 == cost1)); /*! put child1_2 at each child2 position */ float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3)); float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3)); float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3)); float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2)); vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23); vfloat4 min2 = vreduce_min(cost2); int pos2 = (int)__bsf(movemask(min2 == cost2)); /*! put child1_3 at each child2 position */ float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3)); float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3)); float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3)); float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3)); vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33); vfloat4 min3 = vreduce_min(cost3); int pos3 = (int)__bsf(movemask(min3 == cost3)); /*! find best other child */ vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]); int pos[4] = { pos0,pos1,pos2,pos3 }; const size_t mbd = BVH4::maxBuildDepth; vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints valid &= vint4(c2) != vint4(step); if (none(valid)) continue; size_t c1 = select_min(valid,area0123); float area = area0123[c1]; if (c1 == c2) continue; // can happen if bounds are NANs /*! accept a swap when it reduces cost and is not swapping a node with itself */ if (area < bestArea) { bestArea = area; bestChild1 = c1; bestChild2 = c2; bestChild2Child = pos[c1]; } } /*! if we did not find a swap that improves the SAH then do nothing */ if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth); /*! perform the best found tree rotation */ Node* child2 = parent->child(bestChild2).node(); BVH4::swap(parent,bestChild1,child2,bestChild2Child); parent->set(bestChild2,child2->bounds()); BVH4::compact(parent); BVH4::compact(child2); /*! This returned depth is conservative as the child that was * pulled up in the tree could have been on the critical path. */ cdepth[bestChild1]++; // bestChild1 was pushed down one level return 1+reduce_max(cdepth); }