String CudaRenderer::renderObject( Image& frame, OctreeRuntime* runtime, int objectID, const Mat4f& octreeToWorld, const Mat4f& worldToCamera, const Mat4f& projection) { FW_ASSERT(runtime); // Check frame buffer validity. if (frame.getSize().min() <= 0) return ""; if (frame.getFormat() != ImageFormat::ABGR_8888 || frame.getStride() != frame.getSize().x * frame.getBPP()) { return "CudaRenderer: Incompatible framebuffer!"; } // Determine preprocessor defines. const Array<AttachIO::AttachType>& attach = runtime->getAttachTypes(objectID); FW_ASSERT(attach.getSize() == AttachSlot_Max); m_compiler.clearDefines(); bool enableContours = (attach[AttachSlot_Contour] == AttachIO::ContourAttach && m_params.enableContours); if (enableContours) m_compiler.define("ENABLE_CONTOURS"); switch (attach[AttachSlot_Attribute]) { case AttachIO::ColorNormalPaletteAttach: m_compiler.define("VOXELATTRIB_PALETTE"); m_compiler.define("DISABLE_PUSH_OPTIMIZATION"); break; case AttachIO::ColorNormalCornerAttach: m_compiler.define("VOXELATTRIB_CORNER"); m_compiler.define("DISABLE_PUSH_OPTIMIZATION"); break; case AttachIO::ColorNormalDXTAttach: m_compiler.define("VOXELATTRIB_DXT"); break; default: return "Unsupported attribute attachment!"; } if (attach[AttachSlot_AO] == AttachIO::AOAttach) m_compiler.define("VOXELATTRIB_AO"); if (m_params.measureRaycastPerf) m_compiler.define("KERNEL_RAYCAST_PERF"); else m_compiler.define("KERNEL_RENDER"); if (m_params.enablePerfCounters) m_compiler.define("ENABLE_PERF_COUNTERS"); if (m_params.enableLargeReconstruction) m_compiler.define("LARGE_RECONSTRUCTION_KERNEL"); if (m_params.enableJitterLOD) m_compiler.define("JITTER_LOD"); if (m_params.visualization == Visualization_PrimaryAndShadow) m_compiler.define("ENABLE_SHADOWS"); if (!m_blurLUT.getSize()) constructBlurLUT(); m_compiler.define("BLUR_LUT_SIZE", String(m_blurLUT.getSize())); // Determine flags. U32 flags = 0; if (m_params.visualization == Visualization_IterationCount) flags |= RenderFlags_VisualizeIterations; else if (m_params.visualization == Visualization_RaycastLevel) flags |= RenderFlags_VisualizeRaycastLevel; // Set input. m_input.frameSize = frame.getSize(); m_input.flags = flags; m_input.batchSize = m_params.batchSize; m_input.aaRays = (m_params.enableAntialias) ? 4 : 1; m_input.maxVoxelSize = m_params.maxVoxelSize; m_input.brightness = m_params.brightness; m_input.coarseSize = m_params.coarseSize; m_input.coarseFrameSize = (m_input.frameSize + (m_params.coarseSize - 1)) / m_params.coarseSize + 1; m_input.frame = frame.getBuffer().getMutableCudaPtr(); m_input.rootNode = runtime->getRootNodeCuda(objectID); OctreeMatrices& om = m_input.octreeMatrices; Vec3f scale = Vec3f(Vec2f(2.0f) / Vec2f(m_input.frameSize), 1.0f); om.viewportToCamera = projection.inverted() * Mat4f::translate(Vec3f(-1.0f, -1.0f, 0.0f)) * Mat4f::scale(scale); om.cameraToOctree = Mat4f::translate(Vec3f(1.0f)) * (worldToCamera * octreeToWorld).inverted(); Mat4f vto = om.cameraToOctree * om.viewportToCamera; om.pixelInOctree = sqrt(Vec4f(vto.col(0)).getXYZ().cross(Vec4f(vto.col(1)).getXYZ()).length()); om.octreeToWorld = octreeToWorld * Mat4f::translate(Vec3f(-1.0f)); om.worldToOctree = invert(om.octreeToWorld); om.octreeToWorldN = octreeToWorld.getXYZ().inverted().transposed(); om.cameraPosition = invert(worldToCamera) * Vec3f(0.f, 0.f, 0.f); om.octreeToViewport = invert(om.viewportToCamera) * invert(om.cameraToOctree); om.viewportToOctreeN = (om.octreeToViewport).transposed(); // Setup frame-related buffers. int numPixels = m_input.frameSize.x * m_input.frameSize.y; if (m_pixelTable.getSize() != m_input.frameSize) { m_indexToPixel.resizeDiscard(numPixels * sizeof(S32)); m_pixelTable.setSize(m_input.frameSize); memcpy(m_indexToPixel.getMutablePtr(), m_pixelTable.getIndexToPixel(), numPixels * sizeof(S32)); } // Coarse frame and pixel buffers. int coarseNumPixels = m_input.coarseFrameSize.x * m_input.coarseFrameSize.y; m_coarseFrameBuffer.resizeDiscard(coarseNumPixels * sizeof(S32)); m_input.frameCoarse = m_coarseFrameBuffer.getMutableCudaPtr(); if (m_coarsePixelTable.getSize() != m_input.coarseFrameSize) { m_coarseIndexToPixel.resizeDiscard(coarseNumPixels * sizeof(S32)); m_coarsePixelTable.setSize(m_input.coarseFrameSize); memcpy(m_coarseIndexToPixel.getMutablePtr(), m_coarsePixelTable.getIndexToPixel(), coarseNumPixels * sizeof(S32)); m_coarseIndexToPixel.free(Buffer::CPU); } // Temp frame buffer for blurring. if (m_params.enableBlur) { // override frame buffer address! m_tempFrameBuffer.resizeDiscard(numPixels * sizeof(U32)); m_input.frame = m_tempFrameBuffer.getMutableCudaPtr(); } // AA sample buffer if (m_input.aaRays > 1) { m_aaSampleBuffer.resizeDiscard(numPixels * m_input.aaRays * sizeof(U32)); m_input.aaSampleBuffer = m_aaSampleBuffer.getMutableCudaPtr(); } // Setup performance counter buffer. if (m_params.enablePerfCounters) { m_perfCounters.resizeDiscard(m_numWarps * PerfCounter_Max * 33 * sizeof(S64)); memset(m_perfCounters.getMutablePtr(), 0, (size_t)m_perfCounters.getSize()); m_input.perfCounters = m_perfCounters.getMutableCudaPtr(); } // Render. LaunchResult coarseResult; if (m_params.enableBeamOptimization) { RenderInput old = m_input; m_input.numPrimaryRays = coarseNumPixels; m_input.aaRays = 1; m_input.flags |= RenderFlags_CoarsePass; m_input.batchSize = 1; m_compiler.undef("ENABLE_CONTOURS"); coarseResult = launch(coarseNumPixels * m_params.numFrameRepeats, false); m_input = old; m_input.flags |= RenderFlags_UseCoarseData; if (enableContours) m_compiler.define("ENABLE_CONTOURS"); } m_input.numPrimaryRays = numPixels * m_input.aaRays; LaunchResult renderResult = launch(m_input.numPrimaryRays * m_params.numFrameRepeats, true); // Post-process blur. F32 blurTime = 0.f; if (m_params.enableBlur) { // restore true frame buffer pointer m_input.frame = frame.getBuffer().getMutableCudaPtr(); // get module CudaModule* module = m_compiler.compile(); // update blur LUT Vec4i* pLUT = (Vec4i*)module->getGlobal("c_blurLUT").getMutablePtr(); for (int i=0; i < m_blurLUT.getSize(); i++) { float d = sqrtf((float)sqr(m_blurLUT[i].x) + (float)sqr(m_blurLUT[i].y)); Vec4i& v = pLUT[i]; v.x = m_blurLUT[i].x; v.y = m_blurLUT[i].y; v.z = floatToBits((float)m_blurLUT[i].z); v.w = floatToBits(d); } // update globals *(RenderInput*)module->getGlobal("c_input").getMutablePtr() = m_input; module->setTexRef("texTempFrameIn", m_tempFrameBuffer, CU_AD_FORMAT_UNSIGNED_INT8, 4); module->setTexRef("texAASamplesIn", m_aaSampleBuffer, CU_AD_FORMAT_UNSIGNED_INT8, 4); // launch blurTime = module->getKernel("blurKernel").launchTimed(frame.getSize(), Vec2i(8)); } // Update statistics. F32 totalTime = renderResult.time + coarseResult.time + blurTime; m_results.launchTime += totalTime; m_results.coarseTime += coarseResult.time; m_results.renderWarps += renderResult.numWarps; m_results.coarseWarps += coarseResult.numWarps; if (m_params.enablePerfCounters) { const S64* ptr = (const S64*)m_perfCounters.getPtr(); for (int warpIdx = 0; warpIdx < m_numWarps; warpIdx++) { for (int counterIdx = 0; counterIdx < PerfCounter_Max; counterIdx++) { for (int threadIdx = 0; threadIdx < 32; threadIdx++) m_results.threadCounters[counterIdx] += *ptr++; m_results.warpCounters[counterIdx] += *ptr++; } } } m_stats = sprintf("CudaRenderer: launch %.2f ms (%.2f FPS), %.2f MPix/s", totalTime * 1.0e3f, 1.0f / totalTime, numPixels * 1.0e-6f / totalTime); if (m_params.enableBlur) m_stats += sprintf(", blur %.2f MPix/s", numPixels * 1.0e-6f / blurTime); // Adjust the number of warps for the next run. int maxWarps = max(renderResult.numWarps, coarseResult.numWarps); if (maxWarps * 2 > m_numWarps) { if (maxWarps == m_numWarps) printf("CudaRenderer: warp count auto-detect overflow, increasing warp count to %d\n", maxWarps * 2); else printf("CudaRenderer: warp count auto-detected: %d warps, launching %d\n", maxWarps, maxWarps * 2); m_numWarps = maxWarps * 2; } return ""; }
bool App::handleEvent(const Window::Event& ev) { if (ev.type == Window::EventType_Close) { m_window.showModalMessage("Exiting..."); delete this; return true; } Action action = m_action; m_action = Action_None; String name; Mat4f mat; switch (action) { case Action_None: break; case Action_LoadMesh: name = m_window.showFileLoadDialog("Load mesh", getMeshImportFilter()); if (name.getLength()) { Bvh::setBvhMode((Bvh::BvhMode)m_bvhMode); loadMesh(name); } break; case Action_ReloadMesh: if (m_meshFileName.getLength()) { Bvh::setBvhMode((Bvh::BvhMode)m_bvhMode); loadMesh(m_meshFileName); } break; case Action_SaveMesh: name = m_window.showFileSaveDialog("Save mesh", getMeshExportFilter()); if (name.getLength()) saveMesh(name); break; case Action_ResetCamera: if (m_mesh) { m_cameraCtrl.initForMesh(m_mesh); m_commonCtrl.message("Camera reset"); } break; case Action_EncodeCameraSignature: m_window.setVisible(false); printf("\nCamera signature:\n"); printf("%s\n", m_cameraCtrl.encodeSignature().getPtr()); waitKey(); break; case Action_DecodeCameraSignature: { m_window.setVisible(false); printf("\nEnter camera signature:\n"); char buf[1024]; if (scanf_s("%s", buf, FW_ARRAY_SIZE(buf))) m_cameraCtrl.decodeSignature(buf); else setError("Signature too long!"); if (!hasError()) printf("Done.\n\n"); else { printf("Error: %s\n", getError().getPtr()); clearError(); waitKey(); } } break; case Action_NormalizeScale: if (m_mesh) { Vec3f lo, hi; m_mesh->getBBox(lo, hi); m_mesh->xform(Mat4f::scale(Vec3f(2.0f / (hi - lo).max())) * Mat4f::translate((lo + hi) * -0.5f)); } break; case Action_FlipXY: nvswap(mat.col(0), mat.col(1)); if (m_mesh) { m_mesh->xform(mat); m_mesh->flipTriangles(); } break; case Action_FlipYZ: nvswap(mat.col(1), mat.col(2)); if (m_mesh) { m_mesh->xform(mat); m_mesh->flipTriangles(); } break; case Action_FlipZ: mat.col(2) = -mat.col(2); if (m_mesh) { m_mesh->xform(mat); m_mesh->flipTriangles(); } break; case Action_NormalizeNormals: if (m_mesh) m_mesh->xformNormals(mat.getXYZ(), true); break; case Action_FlipNormals: mat = -mat; if (m_mesh) m_mesh->xformNormals(mat.getXYZ(), false); break; case Action_RecomputeNormals: if (m_mesh) m_mesh->recomputeNormals(); break; case Action_FlipTriangles: if (m_mesh) m_mesh->flipTriangles(); break; case Action_CleanMesh: if (m_mesh) m_mesh->clean(); break; case Action_CollapseVertices: if (m_mesh) m_mesh->collapseVertices(); break; case Action_DupVertsPerSubmesh: if (m_mesh) m_mesh->dupVertsPerSubmesh(); break; case Action_FixMaterialColors: if (m_mesh) m_mesh->fixMaterialColors(); break; case Action_DownscaleTextures: if (m_mesh) downscaleTextures(m_mesh); break; case Action_ChopBehindNear: if (m_mesh) { Mat4f worldToClip = m_cameraCtrl.getCameraToClip() * m_cameraCtrl.getWorldToCamera(); Vec4f pleq = worldToClip.getRow(2) + worldToClip.getRow(3); chopBehindPlane(m_mesh, pleq); } break; // Assignment 2: actions case Action_PlaceLightSourceAtCamera: m_areaLight->setOrientation( m_cameraCtrl.getCameraToWorld().getXYZ() ); m_areaLight->setPosition( m_cameraCtrl.getPosition() ); m_commonCtrl.message("Placed light at camera"); break; case Action_ComputeRadiosity: Sampler::setSequenceMode((Sampler::SequenceMode)m_samplingMode); m_radiosity->startRadiosityProcess( m_mesh, m_areaLight, m_rt, m_numBounces, m_numDirectRays, m_numHemisphereRays ); m_updateClock.start(); break; case Action_LoadRadiosity: name = m_window.showFileLoadDialog("Load radiosity solution", "rad:Radiosity Solution" ); if (name.getLength()) loadRadiosity(name); break; case Action_SaveRadiosity: name = m_window.showFileSaveDialog("Save radiosity solution", "rad:Radiosity Solution" ); if (name.getLength()) saveRadiosity(name); break; // Assignment 1: actions /* case Action_TracePrimaryRays: { m_renderer->setAONumRays( m_numAORays ); m_renderer->setAORayLength( m_aoRayLength ); m_renderer->rayTracePicture( m_mesh, m_rt, m_rtImage, m_cameraCtrl, (Renderer::ShadingMode)m_shadingMode, (Renderer::SamplingMode)m_samplingMode, (Renderer::ColorMode)m_colorMode ); m_showRTImage = true; } break; */ default: FW_ASSERT(false); break; } m_window.setVisible(true); if (ev.type == Window::EventType_Paint) renderFrame(m_window.getGL()); m_window.repaint(); return false; }
void xform (const Mat4f& mat) { xformPositions(mat); xformNormals(mat.getXYZ().transposed().inverted()); }