void sample(const Point &p, LuminaireSamplingRecord &lRec, const Point2 &sample) const { lRec.pdf = m_shape->sampleSolidAngle(lRec.sRec, p, sample); lRec.d = p - lRec.sRec.p; if (EXPECT_TAKEN(lRec.pdf > 0 && dot(lRec.d, lRec.sRec.n) > 0)) { lRec.value = m_intensity; lRec.d = normalize(lRec.d); } else { lRec.pdf = 0; } }
void GLRenderer::drawAll(const std::vector<TransformedGPUGeometry> &allGeometry) { Matrix4x4 curObjTrafo; curObjTrafo.setIdentity(); glMatrixMode(GL_MODELVIEW); Matrix4x4 backup = fetchMatrix(GL_MODELVIEW_MATRIX); GLRenderer::beginDrawingMeshes(true); if (m_capabilities->isSupported(RendererCapabilities::EBindless)) { for (std::vector<TransformedGPUGeometry>::const_iterator it = allGeometry.begin(); it != allGeometry.end(); ++it) { const GLGeometry *geo = static_cast<const GLGeometry *>((*it).first); const Matrix4x4 &trafo = (*it).second; const TriMesh *mesh = geo->getTriMesh(); GLuint indexSize = geo->m_size[GLGeometry::EIndexID]; GLuint vertexSize = geo->m_size[GLGeometry::EVertexID]; GLuint64 indexAddr = geo->m_addr[GLGeometry::EIndexID]; GLuint64 vertexAddr = geo->m_addr[GLGeometry::EVertexID]; if (trafo != curObjTrafo) { loadMatrix(backup * trafo); curObjTrafo = trafo; } int stride = geo->m_stride; if (stride != m_stride) { glVertexFormatNV(3, GL_FLOAT, stride); m_stride = stride; } glBufferAddressRangeNV(GL_VERTEX_ARRAY_ADDRESS_NV, 0, vertexAddr, vertexSize); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, indexAddr, indexSize); size_t size = mesh->getTriangleCount(); if (EXPECT_TAKEN(m_queuedTriangles + size < MTS_GL_MAX_QUEUED_TRIS)) { /* Draw all triangles */ glDrawElements(GL_TRIANGLES, (GLsizei) (size * 3), GL_UNSIGNED_INT, (GLvoid *) 0); m_queuedTriangles += size; } else { /* Spoon-feed them (keeps the OS responsive) */ size_t size = mesh->getTriangleCount(), cur = 0; while (cur < size) { size_t drawAmt = std::min(size - cur, MTS_GL_MAX_QUEUED_TRIS - m_queuedTriangles); if (drawAmt > 0) glDrawElements(GL_TRIANGLES, (GLsizei) (drawAmt * 3), GL_UNSIGNED_INT, (GLuint *) 0 + cur * 3); m_queuedTriangles += drawAmt; cur += drawAmt; if (cur < size) finish(); } } } } else { for (std::vector<TransformedGPUGeometry>::const_iterator it = allGeometry.begin(); it != allGeometry.end(); ++it) { const GLGeometry *geo = static_cast<const GLGeometry *>((*it).first); const Matrix4x4 &trafo = (*it).second; const TriMesh *mesh = geo->getTriMesh(); if (trafo != curObjTrafo) { loadMatrix(backup * trafo); curObjTrafo = trafo; } glBindBuffer(GL_ARRAY_BUFFER, geo->m_id[GLGeometry::EVertexID]); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo->m_id[GLGeometry::EIndexID]); /* Set up the vertex/normal arrays */ glVertexPointer(3, GL_FLOAT, geo->m_stride, (GLfloat *) 0); size_t size = mesh->getTriangleCount(); if (EXPECT_TAKEN(m_queuedTriangles + size < MTS_GL_MAX_QUEUED_TRIS)) { /* Draw all triangles */ glDrawElements(GL_TRIANGLES, (GLsizei) (size * 3), GL_UNSIGNED_INT, (GLvoid *) 0); m_queuedTriangles += size; } else { /* Spoon-feed them (keeps the OS responsive) */ size_t size = mesh->getTriangleCount(), cur = 0; while (cur < size) { size_t drawAmt = std::min(size - cur, MTS_GL_MAX_QUEUED_TRIS - m_queuedTriangles); if (drawAmt > 0) glDrawElements(GL_TRIANGLES, (GLsizei) (drawAmt * 3), GL_UNSIGNED_INT, (GLuint *) 0 + cur * 3); m_queuedTriangles += drawAmt; cur += drawAmt; if (cur < size) finish(); } } } } GLRenderer::endDrawingMeshes(); if (!curObjTrafo.isIdentity()) loadMatrix(backup); }
void GLRenderer::drawMesh(const GPUGeometry *_geo) { const GLGeometry *geo = static_cast<const GLGeometry *>(_geo); const TriMesh *mesh = geo->getTriMesh(); GLuint indexSize = geo->m_size[GLGeometry::EIndexID]; GLuint vertexSize = geo->m_size[GLGeometry::EVertexID]; /* Draw using vertex buffer objects (bindless if supported) */ if (m_capabilities->isSupported(RendererCapabilities::EBindless)) { GLuint64 indexAddr = geo->m_addr[GLGeometry::EIndexID]; GLuint64 vertexAddr = geo->m_addr[GLGeometry::EVertexID]; int stride = geo->m_stride; if (stride != m_stride) { glVertexFormatNV(3, GL_FLOAT, stride); glNormalFormatNV(GL_FLOAT, stride); glClientActiveTexture(GL_TEXTURE0); glTexCoordFormatNV(2, GL_FLOAT, stride); glClientActiveTexture(GL_TEXTURE1); glTexCoordFormatNV(3, GL_FLOAT, stride); glColorFormatNV(3, GL_FLOAT, stride); m_stride = stride; } glBufferAddressRangeNV(GL_VERTEX_ARRAY_ADDRESS_NV, 0, vertexAddr, vertexSize); if (!m_transmitOnlyPositions) { int pos = 3 * sizeof(GLfloat); if (mesh->hasVertexNormals()) { if (!m_normalsEnabled) { glEnableClientState(GL_NORMAL_ARRAY); m_normalsEnabled = true; } glBufferAddressRangeNV(GL_NORMAL_ARRAY_ADDRESS_NV, 0, vertexAddr + pos, vertexSize - pos); pos += 3 * sizeof(GLfloat); } else if (m_normalsEnabled) { glDisableClientState(GL_NORMAL_ARRAY); m_normalsEnabled = false; } if (mesh->hasVertexTexcoords()) { glClientActiveTexture(GL_TEXTURE0); if (!m_texcoordsEnabled) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); m_texcoordsEnabled = true; } glBufferAddressRangeNV(GL_TEXTURE_COORD_ARRAY_ADDRESS_NV, 0, vertexAddr + pos, vertexSize - pos); pos += 2 * sizeof(GLfloat); } else if (m_texcoordsEnabled) { glClientActiveTexture(GL_TEXTURE0); glDisableClientState(GL_TEXTURE_COORD_ARRAY); m_texcoordsEnabled = false; } /* Pass 'dpdu' as second set of texture coordinates */ if (mesh->hasUVTangents()) { glClientActiveTexture(GL_TEXTURE1); if (!m_tangentsEnabled) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); m_tangentsEnabled = true; } glBufferAddressRangeNV(GL_TEXTURE_COORD_ARRAY_ADDRESS_NV, 1, vertexAddr + pos, vertexSize - pos); pos += 3 * sizeof(GLfloat); } else if (m_tangentsEnabled) { glClientActiveTexture(GL_TEXTURE1); glDisableClientState(GL_TEXTURE_COORD_ARRAY); m_tangentsEnabled = false; } if (mesh->hasVertexColors()) { if (!m_colorsEnabled) { glEnableClientState(GL_COLOR_ARRAY); m_colorsEnabled = true; } glBufferAddressRangeNV(GL_COLOR_ARRAY_ADDRESS_NV, 0, vertexAddr + pos, vertexSize - pos); } else if (m_colorsEnabled) { glDisableClientState(GL_COLOR_ARRAY); m_colorsEnabled = false; } } glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, indexAddr, indexSize); } else { glBindBuffer(GL_ARRAY_BUFFER, geo->m_id[GLGeometry::EVertexID]); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, geo->m_id[GLGeometry::EIndexID]); int stride = geo->m_stride; /* Set up the vertex/normal arrays */ glVertexPointer(3, GL_FLOAT, stride, (GLfloat *) 0); if (!m_transmitOnlyPositions) { int pos = 3; if (mesh->hasVertexNormals()) { if (!m_normalsEnabled) { glEnableClientState(GL_NORMAL_ARRAY); m_normalsEnabled = true; } glNormalPointer(GL_FLOAT, stride, (GLfloat *) 0 + pos); pos += 3; } else if (m_normalsEnabled) { glDisableClientState(GL_NORMAL_ARRAY); m_normalsEnabled = false; } if (mesh->hasVertexTexcoords()) { glClientActiveTexture(GL_TEXTURE0); if (!m_texcoordsEnabled) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); m_texcoordsEnabled = true; } glTexCoordPointer(2, GL_FLOAT, stride, (GLfloat *) 0 + pos); pos += 2; } else if (m_texcoordsEnabled) { glClientActiveTexture(GL_TEXTURE0); glDisableClientState(GL_TEXTURE_COORD_ARRAY); m_texcoordsEnabled = false; } /* Pass 'dpdu' as second set of texture coordinates */ if (mesh->hasUVTangents()) { glClientActiveTexture(GL_TEXTURE1); if (!m_tangentsEnabled) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); m_tangentsEnabled = true; } glTexCoordPointer(3, GL_FLOAT, stride, (GLfloat *) 0 + pos); pos += 3; } else if (m_tangentsEnabled) { glClientActiveTexture(GL_TEXTURE1); glDisableClientState(GL_TEXTURE_COORD_ARRAY); m_tangentsEnabled = false; } if (mesh->hasVertexColors()) { if (!m_colorsEnabled) { glEnableClientState(GL_COLOR_ARRAY); m_colorsEnabled = true; } glColorPointer(3, GL_FLOAT, stride, (GLfloat *) 0 + pos); } else if (m_colorsEnabled) { glDisableClientState(GL_COLOR_ARRAY); m_colorsEnabled = false; } } } size_t size = mesh->getTriangleCount(); if (EXPECT_TAKEN(m_queuedTriangles + size < MTS_GL_MAX_QUEUED_TRIS)) { /* Draw all triangles */ glDrawElements(GL_TRIANGLES, (GLsizei) (size * 3), GL_UNSIGNED_INT, (GLvoid *) 0); m_queuedTriangles += size; } else { /* Spoon-feed them (keeps the OS responsive) */ size_t size = mesh->getTriangleCount(), cur = 0; while (cur < size) { size_t drawAmt = std::min(size - cur, MTS_GL_MAX_QUEUED_TRIS - m_queuedTriangles); if (drawAmt > 0) glDrawElements(GL_TRIANGLES, (GLsizei) (drawAmt * 3), GL_UNSIGNED_INT, (GLuint *) 0 + cur * 3); m_queuedTriangles += drawAmt; cur += drawAmt; if (cur < size) finish(); } } }
void GLRenderer::drawMesh(const TriMesh *mesh) { std::map<const Shape *, GPUGeometry *>::iterator it = m_geometry.find(mesh); if (it != m_geometry.end()) { GLRenderer::drawMesh((*it).second); } else { /* This shape is not resident in GPU memory. Draw the slow way.. */ const GLchar *positions = (const GLchar *) mesh->getVertexPositions(); const GLchar *normals = (const GLchar *) mesh->getVertexNormals(); const GLchar *texcoords = (const GLchar *) mesh->getVertexTexcoords(); const GLchar *tangents = (const GLchar *) mesh->getUVTangents(); const GLchar *colors = (const GLchar *) mesh->getVertexColors(); const GLint *indices = (const GLint *) mesh->getTriangles(); GLenum dataType = sizeof(Float) == 4 ? GL_FLOAT : GL_DOUBLE; glVertexPointer(3, dataType, 0, positions); if (!m_transmitOnlyPositions) { if (mesh->hasVertexNormals()) { if (!m_normalsEnabled) { glEnableClientState(GL_NORMAL_ARRAY); m_normalsEnabled = true; } glNormalPointer(dataType, 0, normals); } else if (m_normalsEnabled) { glDisableClientState(GL_NORMAL_ARRAY); m_normalsEnabled = false; } glClientActiveTexture(GL_TEXTURE0); if (mesh->hasVertexTexcoords()) { if (!m_texcoordsEnabled) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); m_texcoordsEnabled = true; } glTexCoordPointer(2, dataType, 0, texcoords); } else if (m_texcoordsEnabled) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); m_texcoordsEnabled = false; } /* Pass 'dpdu' as second set of texture coordinates */ glClientActiveTexture(GL_TEXTURE1); if (mesh->hasUVTangents()) { if (!m_tangentsEnabled) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); m_tangentsEnabled = true; } glTexCoordPointer(3, dataType, sizeof(Vector), tangents); } else if (m_tangentsEnabled) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); m_tangentsEnabled = false; } if (mesh->hasVertexColors()) { if (!m_colorsEnabled) { glEnableClientState(GL_COLOR_ARRAY); m_colorsEnabled = true; } glColorPointer(3, dataType, 0, colors); } else if (m_colorsEnabled) { glDisableClientState(GL_COLOR_ARRAY); m_colorsEnabled = false; } } size_t size = mesh->getTriangleCount(); if (EXPECT_TAKEN(m_queuedTriangles + size < MTS_GL_MAX_QUEUED_TRIS)) { /* Draw all triangles */ glDrawElements(GL_TRIANGLES, (GLsizei) (mesh->getTriangleCount()*3), GL_UNSIGNED_INT, indices); m_queuedTriangles += size; } else { /* Spoon-feed them (keeps the OS responsive) */ size_t size = mesh->getTriangleCount(), cur = 0; while (cur < size) { size_t drawAmt = std::min(size - cur, MTS_GL_MAX_QUEUED_TRIS - m_queuedTriangles); if (drawAmt > 0) glDrawElements(GL_TRIANGLES, (GLsizei) (drawAmt * 3), GL_UNSIGNED_INT, indices + cur * 3); m_queuedTriangles += drawAmt; cur += drawAmt; if (cur < size) finish(); } } } }
MTS_NAMESPACE_BEGIN void Intersection::computePartials(const RayDifferential &ray) { Float A[2][2], Bx[2], By[2], x[2]; int axes[2]; /* Compute the texture coordinates partials wrt. changes in the screen-space position. Based on PBRT */ if (hasUVPartials) return; hasUVPartials = true; if (!ray.hasDifferentials || (dpdu.isZero() && dpdv.isZero())) { dudx = dvdx = dudy = dvdy = 0.0f; return; } /* Offset of the plane passing through the surface */ const Float d = -dot(geoFrame.n, Vector(p)); const Float txRecip = dot(geoFrame.n, ray.rxDirection), tyRecip = dot(geoFrame.n, ray.ryDirection); if (EXPECT_NOT_TAKEN(txRecip == 0 || tyRecip == 0)) { dudx = dvdx = dudy = dvdy = 0.0f; return; } /* Ray distances traveled */ const Float tx = -(dot(geoFrame.n, Vector(ray.rxOrigin)) + d) / txRecip; const Float ty = -(dot(geoFrame.n, Vector(ray.ryOrigin)) + d) / tyRecip; /* Calculate the U and V partials by solving two out of a set of 3 equations in an overconstrained system */ Float absX = std::abs(geoFrame.n.x), absY = std::abs(geoFrame.n.y), absZ = std::abs(geoFrame.n.z); if (absX > absY && absX > absZ) { axes[0] = 1; axes[1] = 2; } else if (absY > absZ) { axes[0] = 0; axes[1] = 2; } else { axes[0] = 0; axes[1] = 1; } A[0][0] = dpdu[axes[0]]; A[0][1] = dpdv[axes[0]]; A[1][0] = dpdu[axes[1]]; A[1][1] = dpdv[axes[1]]; /* Auxilary intersection point of the adjacent rays */ Point px = ray.rxOrigin + ray.rxDirection * tx, py = ray.ryOrigin + ray.ryDirection * ty; Bx[0] = px[axes[0]] - p[axes[0]]; Bx[1] = px[axes[1]] - p[axes[1]]; By[0] = py[axes[0]] - p[axes[0]]; By[1] = py[axes[1]] - p[axes[1]]; if (EXPECT_TAKEN(solveLinearSystem2x2(A, Bx, x))) { dudx = x[0]; dvdx = x[1]; } else { dudx = 1; dvdx = 0; } if (EXPECT_TAKEN(solveLinearSystem2x2(A, By, x))) { dudy = x[0]; dvdy = x[1]; } else { dudy = 0; dudy = 1; } }
void PreviewWorker::processCoherent(const WorkUnit *workUnit, WorkResult *workResult, const bool &stop) { #if defined(MTS_HAS_COHERENT_RT) const RectangularWorkUnit *rect = static_cast<const RectangularWorkUnit *>(workUnit); ImageBlock *block = static_cast<ImageBlock *>(workResult); block->setOffset(rect->getOffset()); block->setSize(rect->getSize()); /* Some constants */ const int sx = rect->getOffset().x, sy = block->getOffset().y; const int ex = sx + rect->getSize().x, ey = sy + rect->getSize().y; const int width = rect->getSize().x; const SSEVector MM_ALIGN16 xOffset(0.0f, 1.0f, 0.0f, 1.0f); const SSEVector MM_ALIGN16 yOffset(0.0f, 0.0f, 1.0f, 1.0f); const int pixelOffset[] = {0, 1, width, width+1}; const __m128 clamping = _mm_set1_ps(1/(m_minDist*m_minDist)); uint8_t temp[MTS_KD_INTERSECTION_TEMP*4]; const __m128 camTL[3] = { _mm_set1_ps(m_cameraTL.x), _mm_set1_ps(m_cameraTL.y), _mm_set1_ps(m_cameraTL.z) }; const __m128 camDx[3] = { _mm_set1_ps(m_cameraDx.x), _mm_set1_ps(m_cameraDx.y), _mm_set1_ps(m_cameraDx.z) }; const __m128 camDy[3] = { _mm_set1_ps(m_cameraDy.x), _mm_set1_ps(m_cameraDy.y), _mm_set1_ps(m_cameraDy.z) }; const __m128 lumPos[3] = { _mm_set1_ps(m_vpl.its.p.x), _mm_set1_ps(m_vpl.its.p.y), _mm_set1_ps(m_vpl.its.p.z) }; const __m128 lumDir[3] = { _mm_set1_ps(m_vpl.its.shFrame.n.x), _mm_set1_ps(m_vpl.its.shFrame.n.y), _mm_set1_ps(m_vpl.its.shFrame.n.z) }; /* Some local variables */ int pos = 0; int numRays = 0; RayPacket4 MM_ALIGN16 primRay4, secRay4; Intersection4 MM_ALIGN16 its4, secIts4; RayInterval4 MM_ALIGN16 itv4, secItv4; SSEVector MM_ALIGN16 nSecD[3], cosThetaLight, invLengthSquared; Spectrum emitted[4], direct[4]; Intersection its; Vector wo, wi; its.hasUVPartials = false; bool diffuseVPL = false, vplOnSurface = false; Spectrum vplWeight; if (m_vpl.type == ESurfaceVPL && (m_diffuseSources || m_vpl.its.shape->getBSDF()->getType() == BSDF::EDiffuseReflection)) { diffuseVPL = true; vplOnSurface = true; vplWeight = m_vpl.its.shape->getBSDF()->getDiffuseReflectance(m_vpl.its) * m_vpl.P / M_PI; } else if (m_vpl.type == ELuminaireVPL) { vplOnSurface = m_vpl.luminaire->getType() & Luminaire::EOnSurface; diffuseVPL = m_vpl.luminaire->getType() & Luminaire::EDiffuseDirection; EmissionRecord eRec(m_vpl.luminaire, ShapeSamplingRecord(m_vpl.its.p, m_vpl.its.shFrame.n), m_vpl.its.shFrame.n); vplWeight = m_vpl.P * m_vpl.luminaire->evalDirection(eRec); } primRay4.o[0].ps = _mm_set1_ps(m_cameraO.x); primRay4.o[1].ps = _mm_set1_ps(m_cameraO.y); primRay4.o[2].ps = _mm_set1_ps(m_cameraO.z); secItv4.mint.ps = _mm_set1_ps(ShadowEpsilon); /* Work on 2x2 sub-blocks */ for (int y=sy; y<ey; y += 2, pos += width) { for (int x=sx; x<ex; x += 2, pos += 2) { /* Generate camera rays without normalization */ const __m128 xPixel = _mm_add_ps(xOffset.ps, _mm_set1_ps((float) x)), yPixel = _mm_add_ps(yOffset.ps, _mm_set1_ps((float) y)); primRay4.d[0].ps = _mm_add_ps(camTL[0], _mm_add_ps( _mm_mul_ps(xPixel, camDx[0]), _mm_mul_ps(yPixel, camDy[0]))); primRay4.d[1].ps = _mm_add_ps(camTL[1], _mm_add_ps( _mm_mul_ps(xPixel, camDx[1]), _mm_mul_ps(yPixel, camDy[1]))); primRay4.d[2].ps = _mm_add_ps(camTL[2], _mm_add_ps( _mm_mul_ps(xPixel, camDx[2]), _mm_mul_ps(yPixel, camDy[2]))); primRay4.dRcp[0].ps = _mm_div_ps(SSEConstants::one.ps, primRay4.d[0].ps); primRay4.dRcp[1].ps = _mm_div_ps(SSEConstants::one.ps, primRay4.d[1].ps); primRay4.dRcp[2].ps = _mm_div_ps(SSEConstants::one.ps, primRay4.d[2].ps); /* Ray coherence test */ const int primSignsX = _mm_movemask_ps(primRay4.d[0].ps); const int primSignsY = _mm_movemask_ps(primRay4.d[1].ps); const int primSignsZ = _mm_movemask_ps(primRay4.d[2].ps); const bool primCoherent = (primSignsX == 0 || primSignsX == 0xF) && (primSignsY == 0 || primSignsY == 0xF) && (primSignsZ == 0 || primSignsZ == 0xF); /* Trace the primary rays */ its4.t = SSEConstants::p_inf; if (EXPECT_TAKEN(primCoherent)) { primRay4.signs[0][0] = primSignsX ? 1 : 0; primRay4.signs[1][0] = primSignsY ? 1 : 0; primRay4.signs[2][0] = primSignsZ ? 1 : 0; m_kdtree->rayIntersectPacket(primRay4, itv4, its4, temp); } else { m_kdtree->rayIntersectPacketIncoherent(primRay4, itv4, its4, temp); } numRays += 4; /* Generate secondary rays */ secRay4.o[0].ps = _mm_add_ps(primRay4.o[0].ps, _mm_mul_ps(its4.t.ps, primRay4.d[0].ps)); secRay4.o[1].ps = _mm_add_ps(primRay4.o[1].ps, _mm_mul_ps(its4.t.ps, primRay4.d[1].ps)); secRay4.o[2].ps = _mm_add_ps(primRay4.o[2].ps, _mm_mul_ps(its4.t.ps, primRay4.d[2].ps)); secRay4.d[0].ps = _mm_sub_ps(lumPos[0], secRay4.o[0].ps); secRay4.d[1].ps = _mm_sub_ps(lumPos[1], secRay4.o[1].ps); secRay4.d[2].ps = _mm_sub_ps(lumPos[2], secRay4.o[2].ps); /* Normalization */ const __m128 lengthSquared = _mm_add_ps(_mm_add_ps( _mm_mul_ps(secRay4.d[0].ps, secRay4.d[0].ps), _mm_mul_ps(secRay4.d[1].ps, secRay4.d[1].ps)), _mm_mul_ps(secRay4.d[2].ps, secRay4.d[2].ps)), invLength = _mm_rsqrt_ps(lengthSquared); invLengthSquared.ps = _mm_min_ps(_mm_rcp_ps(lengthSquared), clamping); nSecD[0].ps = _mm_mul_ps(secRay4.d[0].ps, invLength); nSecD[1].ps = _mm_mul_ps(secRay4.d[1].ps, invLength); nSecD[2].ps = _mm_mul_ps(secRay4.d[2].ps, invLength); secRay4.dRcp[0].ps = _mm_div_ps(SSEConstants::one.ps, secRay4.d[0].ps); secRay4.dRcp[1].ps = _mm_div_ps(SSEConstants::one.ps, secRay4.d[1].ps); secRay4.dRcp[2].ps = _mm_div_ps(SSEConstants::one.ps, secRay4.d[2].ps); cosThetaLight.ps = _mm_sub_ps(_mm_setzero_ps(), _mm_add_ps(_mm_add_ps( _mm_mul_ps(nSecD[0].ps, lumDir[0]), _mm_mul_ps(nSecD[1].ps, lumDir[1])), _mm_mul_ps(nSecD[2].ps, lumDir[2]))); secItv4.maxt.ps = _mm_set1_ps(1-ShadowEpsilon); /* Shading (scalar) --- this is way too much work and should be rewritten to be smarter in special cases */ for (int idx=0; idx<4; ++idx) { if (EXPECT_NOT_TAKEN(its4.t.f[idx] == std::numeric_limits<float>::infinity())) { /* Don't trace a secondary ray */ secItv4.maxt.f[idx] = 0; emitted[idx] = m_scene->LeBackground(Ray( Point(primRay4.o[0].f[idx], primRay4.o[1].f[idx], primRay4.o[2].f[idx]), Vector(primRay4.d[0].f[idx], primRay4.d[1].f[idx], primRay4.d[2].f[idx]), 0.0f )) * m_backgroundScale; memset(&direct[idx], 0, sizeof(Spectrum)); continue; } const unsigned int primIndex = its4.primIndex.i[idx]; const Shape *shape = (*m_shapes)[its4.shapeIndex.i[idx]]; const BSDF *bsdf = shape->getBSDF(); if (EXPECT_NOT_TAKEN(!bsdf)) { memset(&emitted[idx], 0, sizeof(Spectrum)); memset(&direct[idx], 0, sizeof(Spectrum)); continue; } if (EXPECT_TAKEN(primIndex != KNoTriangleFlag)) { const TriMesh *mesh = static_cast<const TriMesh *>(shape); const Triangle &t = mesh->getTriangles()[primIndex]; const Normal *normals = mesh->getVertexNormals(); const Point2 *texcoords = mesh->getVertexTexcoords(); const Spectrum *colors = mesh->getVertexColors(); const TangentSpace * tangents = mesh->getVertexTangents(); const Float beta = its4.u.f[idx], gamma = its4.v.f[idx], alpha = 1.0f - beta - gamma; const uint32_t idx0 = t.idx[0], idx1 = t.idx[1], idx2 = t.idx[2]; if (EXPECT_TAKEN(normals)) { const Normal &n0 = normals[idx0], &n1 = normals[idx1], &n2 = normals[idx2]; its.shFrame.n = normalize(n0 * alpha + n1 * beta + n2 * gamma); } else { const Point *positions = mesh->getVertexPositions(); const Point &p0 = positions[idx0], &p1 = positions[idx1], &p2 = positions[idx2]; Vector sideA = p1 - p0, sideB = p2 - p0; Vector n = cross(sideA, sideB); Float nLengthSqr = n.lengthSquared(); if (nLengthSqr != 0) n /= std::sqrt(nLengthSqr); its.shFrame.n = Normal(n); } if (EXPECT_TAKEN(texcoords)) { const Point2 &t0 = texcoords[idx0], &t1 = texcoords[idx1], &t2 = texcoords[idx2]; its.uv = t0 * alpha + t1 * beta + t2 * gamma; } else { its.uv = Point2(0.0f); } if (EXPECT_NOT_TAKEN(colors)) { const Spectrum &c0 = colors[idx0], &c1 = colors[idx1], &c2 = colors[idx2]; its.color = c0 * alpha + c1 * beta + c2 * gamma; } if (EXPECT_NOT_TAKEN(tangents)) { const TangentSpace &t0 = tangents[idx0], &t1 = tangents[idx1], &t2 = tangents[idx2]; its.dpdu = t0.dpdu * alpha + t1.dpdu * beta + t2.dpdu * gamma; its.dpdv = t0.dpdv * alpha + t1.dpdv * beta + t2.dpdv * gamma; } } else { Ray ray( Point(primRay4.o[0].f[idx], primRay4.o[1].f[idx], primRay4.o[2].f[idx]), Vector(primRay4.d[0].f[idx], primRay4.d[1].f[idx], primRay4.d[2].f[idx]), 0.0f ); its.t = its4.t.f[idx]; shape->fillIntersectionRecord(ray, temp + idx * MTS_KD_INTERSECTION_TEMP + 8, its); bsdf = its.shape->getBSDF(); } wo.x = nSecD[0].f[idx]; wo.y = nSecD[1].f[idx]; wo.z = nSecD[2].f[idx]; if (EXPECT_TAKEN(!shape->isLuminaire())) { memset(&emitted[idx], 0, sizeof(Spectrum)); } else { Vector d(-primRay4.d[0].f[idx], -primRay4.d[1].f[idx], -primRay4.d[2].f[idx]); emitted[idx] = shape->getLuminaire()->Le(ShapeSamplingRecord(its.p, its.shFrame.n), d); } if (EXPECT_TAKEN(bsdf->getType() == BSDF::EDiffuseReflection && diffuseVPL)) { /* Fast path */ direct[idx] = (bsdf->getDiffuseReflectance(its) * vplWeight) * (std::max((Float) 0.0f, dot(wo, its.shFrame.n)) * (vplOnSurface ? (std::max(cosThetaLight.f[idx], (Float) 0.0f) * INV_PI) : INV_PI) * invLengthSquared.f[idx]); } else { wi.x = -primRay4.d[0].f[idx]; wi.y = -primRay4.d[1].f[idx]; wi.z = -primRay4.d[2].f[idx]; its.p.x = secRay4.o[0].f[idx]; its.p.y = secRay4.o[1].f[idx]; its.p.z = secRay4.o[2].f[idx]; if (EXPECT_NOT_TAKEN(bsdf->getType() & BSDF::EAnisotropic)) { its.shFrame.s = normalize(its.dpdu - its.shFrame.n * dot(its.shFrame.n, its.dpdu)); its.shFrame.t = cross(its.shFrame.n, its.shFrame.s); } else { coordinateSystem(its.shFrame.n, its.shFrame.s, its.shFrame.t); } const Float ctLight = cosThetaLight.f[idx]; wi = normalize(wi); its.wi = its.toLocal(wi); wo = its.toLocal(wo); if (!diffuseVPL) { if (m_vpl.type == ESurfaceVPL) { BSDFQueryRecord bRec(m_vpl.its, m_vpl.its.toLocal(wi)); bRec.quantity = EImportance; vplWeight = m_vpl.its.shape->getBSDF()->eval(bRec) * m_vpl.P; } else { EmissionRecord eRec(m_vpl.luminaire, ShapeSamplingRecord(m_vpl.its.p, m_vpl.its.shFrame.n), wi); eRec.type = EmissionRecord::EPreview; vplWeight = m_vpl.luminaire->evalDirection(eRec) * m_vpl.P; } } if (EXPECT_TAKEN(ctLight >= 0)) { direct[idx] = (bsdf->eval(BSDFQueryRecord(its, wo)) * vplWeight * ((vplOnSurface ? std::max(ctLight, (Float) 0.0f) : 1.0f) * invLengthSquared.f[idx])); } else { memset(&direct[idx], 0, sizeof(Spectrum)); } } ++numRays; } /* Shoot the secondary rays */ const int secSignsX = _mm_movemask_ps(secRay4.d[0].ps); const int secSignsY = _mm_movemask_ps(secRay4.d[1].ps); const int secSignsZ = _mm_movemask_ps(secRay4.d[2].ps); const bool secCoherent = (secSignsX == 0 || secSignsX == 0xF) && (secSignsY == 0 || secSignsY == 0xF) && (secSignsZ == 0 || secSignsZ == 0xF); /* Shoot the secondary rays */ secIts4.t = SSEConstants::p_inf; if (EXPECT_TAKEN(secCoherent)) { secRay4.signs[0][0] = secSignsX ? 1 : 0; secRay4.signs[1][0] = secSignsY ? 1 : 0; secRay4.signs[2][0] = secSignsZ ? 1 : 0; m_kdtree->rayIntersectPacket(secRay4, secItv4, secIts4, temp); } else { m_kdtree->rayIntersectPacketIncoherent(secRay4, secItv4, secIts4, temp); } for (int idx=0; idx<4; ++idx) { if (EXPECT_TAKEN(secIts4.t.f[idx] == std::numeric_limits<float>::infinity())) block->setPixel(pos+pixelOffset[idx], direct[idx]+emitted[idx]); else block->setPixel(pos+pixelOffset[idx], emitted[idx]); } } } block->setExtra(numRays); #else Log(EError, "Coherent raytracing support was not compiled into this binary!"); #endif }