void TriangleMesh::interpolate(unsigned primID, float u, float v, RTCBufferType buffer, float* P, float* dPdu, float* dPdv, size_t numFloats) { /* test if interpolation is enabled */ #if defined(DEBUG) if ((parent->aflags & RTC_INTERPOLATE) == 0) throw_RTCError(RTC_INVALID_OPERATION,"rtcInterpolate can only get called when RTC_INTERPOLATE is enabled for the scene"); #endif /* calculate base pointer and stride */ assert((buffer >= RTC_VERTEX_BUFFER0 && buffer <= RTC_VERTEX_BUFFER1) || (buffer >= RTC_USER_VERTEX_BUFFER0 && buffer <= RTC_USER_VERTEX_BUFFER1)); const char* src = nullptr; size_t stride = 0; if (buffer >= RTC_USER_VERTEX_BUFFER0) { src = userbuffers[buffer&0xFFFF]->getPtr(); stride = userbuffers[buffer&0xFFFF]->getStride(); } else { src = vertices[buffer&0xFFFF].getPtr(); stride = vertices[buffer&0xFFFF].getStride(); } #if !defined(__MIC__) for (size_t i=0; i<numFloats; i+=4) { size_t ofs = i*sizeof(float); const float w = 1.0f-u-v; const Triangle& tri = triangle(primID); const vfloat4 p0 = vfloat4::loadu((float*)&src[tri.v[0]*stride+ofs]); const vfloat4 p1 = vfloat4::loadu((float*)&src[tri.v[1]*stride+ofs]); const vfloat4 p2 = vfloat4::loadu((float*)&src[tri.v[2]*stride+ofs]); const vbool4 valid = vint4(i)+vint4(step) < vint4(numFloats); if (P ) vfloat4::storeu(valid,P+i,w*p0 + u*p1 + v*p2); if (dPdu) vfloat4::storeu(valid,dPdu+i,p1-p0); if (dPdv) vfloat4::storeu(valid,dPdv+i,p2-p0); } #else for (size_t i=0; i<numFloats; i+=16) { size_t ofs = i*sizeof(float); vbool16 mask = (i+16 > numFloats) ? (vbool16)(((unsigned int)1 << (numFloats-i))-1) : vbool16( true ); const float w = 1.0f-u-v; const Triangle& tri = triangle(primID); const vfloat16 p0 = vfloat16::loadu(mask,(float*)&src[tri.v[0]*stride+ofs]); const vfloat16 p1 = vfloat16::loadu(mask,(float*)&src[tri.v[1]*stride+ofs]); const vfloat16 p2 = vfloat16::loadu(mask,(float*)&src[tri.v[2]*stride+ofs]); if (P ) vfloat16::storeu_compact(mask,P+i,w*p0 + u*p1 + v*p2); if (dPdu) vfloat16::storeu_compact(mask,dPdu+i,p1-p0); if (dPdv) vfloat16::storeu_compact(mask,dPdv+i,p2-p0); } #endif }
void BezierCurves::interpolate(unsigned primID, float u, float v, RTCBufferType buffer, float* P, float* dPdu, float* dPdv, size_t numFloats) { /* test if interpolation is enabled */ #if defined(DEBUG) if ((parent->aflags & RTC_INTERPOLATE) == 0) throw_RTCError(RTC_INVALID_OPERATION,"rtcInterpolate can only get called when RTC_INTERPOLATE is enabled for the scene"); #endif /* calculate base pointer and stride */ assert((buffer >= RTC_VERTEX_BUFFER0 && buffer <= RTC_VERTEX_BUFFER1) || (buffer >= RTC_USER_VERTEX_BUFFER0 && buffer <= RTC_USER_VERTEX_BUFFER1)); const char* src = nullptr; size_t stride = 0; if (buffer >= RTC_USER_VERTEX_BUFFER0) { src = userbuffers[buffer&0xFFFF]->getPtr(); stride = userbuffers[buffer&0xFFFF]->getStride(); } else { src = vertices[buffer&0xFFFF].getPtr(); stride = vertices[buffer&0xFFFF].getStride(); } #if !defined(__MIC__) for (size_t i=0; i<numFloats; i+=4) { size_t ofs = i*sizeof(float); const size_t curve = curves[primID]; const vfloat4 p0 = vfloat4::loadu((float*)&src[(curve+0)*stride+ofs]); const vfloat4 p1 = vfloat4::loadu((float*)&src[(curve+1)*stride+ofs]); const vfloat4 p2 = vfloat4::loadu((float*)&src[(curve+2)*stride+ofs]); const vfloat4 p3 = vfloat4::loadu((float*)&src[(curve+3)*stride+ofs]); const vbool4 valid = vint4(i)+vint4(step) < vint4(numFloats); const BezierCurveT<vfloat4> bezier(p0,p1,p2,p3,0.0f,1.0f,0); vfloat4 Q, dQdu; bezier.eval(u,Q,dQdu); if (P ) vfloat4::storeu(valid,P+i,Q); if (dPdu) vfloat4::storeu(valid,dPdu+i,dQdu); } #else for (size_t i=0; i<numFloats; i+=16) { size_t ofs = i*sizeof(float); vbool16 mask = (i+16 > numFloats) ? (vbool16)(((unsigned int)1 << (numFloats-i))-1) : vbool16( true ); const size_t curve = curves[primID]; const vfloat16 p0 = vfloat16::loadu(mask,(float*)&src[(curve+0)*stride+ofs]); const vfloat16 p1 = vfloat16::loadu(mask,(float*)&src[(curve+1)*stride+ofs]); const vfloat16 p2 = vfloat16::loadu(mask,(float*)&src[(curve+2)*stride+ofs]); const vfloat16 p3 = vfloat16::loadu(mask,(float*)&src[(curve+3)*stride+ofs]); const BezierCurveT<vfloat16> bezier(p0,p1,p2,p3,0.0f,1.0f,0); vfloat16 Q, dQdu; bezier.eval(u,Q,dQdu); if (P ) vfloat16::storeu_compact(mask,P+i,Q); if (dPdu) vfloat16::storeu_compact(mask,dPdu+i,dQdu); } #endif }