String CudaRenderer::renderObject(
    Image&          frame,
    OctreeRuntime*  runtime,
    int             objectID,
    const Mat4f&    octreeToWorld,
    const Mat4f&    worldToCamera,
    const Mat4f&    projection)
{
    FW_ASSERT(runtime);

    // Check frame buffer validity.

    if (frame.getSize().min() <= 0)
        return "";

    if (frame.getFormat() != ImageFormat::ABGR_8888 ||
        frame.getStride() != frame.getSize().x * frame.getBPP())
    {
        return "CudaRenderer: Incompatible framebuffer!";
    }

    // Determine preprocessor defines.

    const Array<AttachIO::AttachType>& attach = runtime->getAttachTypes(objectID);
    FW_ASSERT(attach.getSize() == AttachSlot_Max);

    m_compiler.clearDefines();

    bool enableContours = (attach[AttachSlot_Contour] == AttachIO::ContourAttach && m_params.enableContours);
    if (enableContours)
        m_compiler.define("ENABLE_CONTOURS");

    switch (attach[AttachSlot_Attribute])
    {
    case AttachIO::ColorNormalPaletteAttach:    m_compiler.define("VOXELATTRIB_PALETTE"); m_compiler.define("DISABLE_PUSH_OPTIMIZATION"); break;
    case AttachIO::ColorNormalCornerAttach:     m_compiler.define("VOXELATTRIB_CORNER"); m_compiler.define("DISABLE_PUSH_OPTIMIZATION"); break;
    case AttachIO::ColorNormalDXTAttach:        m_compiler.define("VOXELATTRIB_DXT"); break;
    default:                                    return "Unsupported attribute attachment!";
    }

    if (attach[AttachSlot_AO] == AttachIO::AOAttach)
        m_compiler.define("VOXELATTRIB_AO");

    if (m_params.measureRaycastPerf)
        m_compiler.define("KERNEL_RAYCAST_PERF");
    else
        m_compiler.define("KERNEL_RENDER");

    if (m_params.enablePerfCounters)
        m_compiler.define("ENABLE_PERF_COUNTERS");

    if (m_params.enableLargeReconstruction)
        m_compiler.define("LARGE_RECONSTRUCTION_KERNEL");

    if (m_params.enableJitterLOD)
        m_compiler.define("JITTER_LOD");

    if (m_params.visualization == Visualization_PrimaryAndShadow)
        m_compiler.define("ENABLE_SHADOWS");

    if (!m_blurLUT.getSize())
        constructBlurLUT();
    m_compiler.define("BLUR_LUT_SIZE", String(m_blurLUT.getSize()));

    // Determine flags.

    U32 flags = 0;
    if (m_params.visualization == Visualization_IterationCount)
        flags |= RenderFlags_VisualizeIterations;
    else if (m_params.visualization == Visualization_RaycastLevel)
        flags |= RenderFlags_VisualizeRaycastLevel;

    // Set input.

    m_input.frameSize       = frame.getSize();
    m_input.flags           = flags;
    m_input.batchSize       = m_params.batchSize;
    m_input.aaRays          = (m_params.enableAntialias) ? 4 : 1;
    m_input.maxVoxelSize    = m_params.maxVoxelSize;
    m_input.brightness      = m_params.brightness;
    m_input.coarseSize      = m_params.coarseSize;
    m_input.coarseFrameSize = (m_input.frameSize + (m_params.coarseSize - 1)) / m_params.coarseSize + 1;
    m_input.frame           = frame.getBuffer().getMutableCudaPtr();
    m_input.rootNode        = runtime->getRootNodeCuda(objectID);

    OctreeMatrices& om      = m_input.octreeMatrices;
    Vec3f scale             = Vec3f(Vec2f(2.0f) / Vec2f(m_input.frameSize), 1.0f);
    om.viewportToCamera     = projection.inverted() * Mat4f::translate(Vec3f(-1.0f, -1.0f, 0.0f)) * Mat4f::scale(scale);
    om.cameraToOctree       = Mat4f::translate(Vec3f(1.0f)) * (worldToCamera * octreeToWorld).inverted();
    Mat4f vto               = om.cameraToOctree * om.viewportToCamera;
    om.pixelInOctree        = sqrt(Vec4f(vto.col(0)).getXYZ().cross(Vec4f(vto.col(1)).getXYZ()).length());

    om.octreeToWorld        = octreeToWorld * Mat4f::translate(Vec3f(-1.0f));
    om.worldToOctree        = invert(om.octreeToWorld);
    om.octreeToWorldN       = octreeToWorld.getXYZ().inverted().transposed();
    om.cameraPosition       = invert(worldToCamera) * Vec3f(0.f, 0.f, 0.f);
    om.octreeToViewport     = invert(om.viewportToCamera) * invert(om.cameraToOctree);
    om.viewportToOctreeN    = (om.octreeToViewport).transposed();

    // Setup frame-related buffers.

    int numPixels = m_input.frameSize.x * m_input.frameSize.y;
    if (m_pixelTable.getSize() != m_input.frameSize)
    {
        m_indexToPixel.resizeDiscard(numPixels * sizeof(S32));
        m_pixelTable.setSize(m_input.frameSize);
        memcpy(m_indexToPixel.getMutablePtr(), m_pixelTable.getIndexToPixel(), numPixels * sizeof(S32));
    }

    // Coarse frame and pixel buffers.

    int coarseNumPixels = m_input.coarseFrameSize.x * m_input.coarseFrameSize.y;
    m_coarseFrameBuffer.resizeDiscard(coarseNumPixels * sizeof(S32));
    m_input.frameCoarse = m_coarseFrameBuffer.getMutableCudaPtr();

    if (m_coarsePixelTable.getSize() != m_input.coarseFrameSize)
    {
        m_coarseIndexToPixel.resizeDiscard(coarseNumPixels * sizeof(S32));
        m_coarsePixelTable.setSize(m_input.coarseFrameSize);
        memcpy(m_coarseIndexToPixel.getMutablePtr(), m_coarsePixelTable.getIndexToPixel(), coarseNumPixels * sizeof(S32));
        m_coarseIndexToPixel.free(Buffer::CPU);
    }

    // Temp frame buffer for blurring.

    if (m_params.enableBlur)
    {
        // override frame buffer address!
        m_tempFrameBuffer.resizeDiscard(numPixels * sizeof(U32));
        m_input.frame = m_tempFrameBuffer.getMutableCudaPtr();
    }

    // AA sample buffer
    if (m_input.aaRays > 1)
    {
        m_aaSampleBuffer.resizeDiscard(numPixels * m_input.aaRays * sizeof(U32));
        m_input.aaSampleBuffer = m_aaSampleBuffer.getMutableCudaPtr();
    }

    // Setup performance counter buffer.

    if (m_params.enablePerfCounters)
    {
        m_perfCounters.resizeDiscard(m_numWarps * PerfCounter_Max * 33 * sizeof(S64));
        memset(m_perfCounters.getMutablePtr(), 0, (size_t)m_perfCounters.getSize());
        m_input.perfCounters = m_perfCounters.getMutableCudaPtr();
    }

    // Render.

    LaunchResult coarseResult;
    if (m_params.enableBeamOptimization)
    {
        RenderInput old = m_input;
        m_input.numPrimaryRays = coarseNumPixels;
        m_input.aaRays = 1;
        m_input.flags |= RenderFlags_CoarsePass;
        m_input.batchSize = 1;
        m_compiler.undef("ENABLE_CONTOURS");

        coarseResult = launch(coarseNumPixels * m_params.numFrameRepeats, false);

        m_input = old;
        m_input.flags |= RenderFlags_UseCoarseData;
        if (enableContours)
            m_compiler.define("ENABLE_CONTOURS");
    }

    m_input.numPrimaryRays = numPixels * m_input.aaRays;
    LaunchResult renderResult = launch(m_input.numPrimaryRays * m_params.numFrameRepeats, true);

    // Post-process blur.
    F32 blurTime = 0.f;
    if (m_params.enableBlur)
    {
        // restore true frame buffer pointer
        m_input.frame = frame.getBuffer().getMutableCudaPtr();

        // get module
        CudaModule* module = m_compiler.compile();

        // update blur LUT
        Vec4i* pLUT = (Vec4i*)module->getGlobal("c_blurLUT").getMutablePtr();
        for (int i=0; i < m_blurLUT.getSize(); i++)
        {
            float d = sqrtf((float)sqr(m_blurLUT[i].x) + (float)sqr(m_blurLUT[i].y));
            Vec4i& v = pLUT[i];
            v.x = m_blurLUT[i].x;
            v.y = m_blurLUT[i].y;
            v.z = floatToBits((float)m_blurLUT[i].z);
            v.w = floatToBits(d);
        }

        // update globals
        *(RenderInput*)module->getGlobal("c_input").getMutablePtr() = m_input;
        module->setTexRef("texTempFrameIn", m_tempFrameBuffer, CU_AD_FORMAT_UNSIGNED_INT8, 4);
        module->setTexRef("texAASamplesIn", m_aaSampleBuffer, CU_AD_FORMAT_UNSIGNED_INT8, 4);

        // launch
        blurTime = module->getKernel("blurKernel").launchTimed(frame.getSize(), Vec2i(8));

    }

    // Update statistics.

    F32 totalTime           = renderResult.time + coarseResult.time + blurTime;
    m_results.launchTime    += totalTime;
    m_results.coarseTime    += coarseResult.time;
    m_results.renderWarps   += renderResult.numWarps;
    m_results.coarseWarps   += coarseResult.numWarps;

    if (m_params.enablePerfCounters)
    {
        const S64* ptr = (const S64*)m_perfCounters.getPtr();
        for (int warpIdx = 0; warpIdx < m_numWarps; warpIdx++)
        {
            for (int counterIdx = 0; counterIdx < PerfCounter_Max; counterIdx++)
            {
                for (int threadIdx = 0; threadIdx < 32; threadIdx++)
                    m_results.threadCounters[counterIdx] += *ptr++;
                m_results.warpCounters[counterIdx] += *ptr++;
            }
        }
    }

    m_stats = sprintf("CudaRenderer: launch %.2f ms (%.2f FPS), %.2f MPix/s",
        totalTime * 1.0e3f,
        1.0f / totalTime,
        numPixels * 1.0e-6f / totalTime);

    if (m_params.enableBlur)
        m_stats += sprintf(", blur %.2f MPix/s", numPixels * 1.0e-6f / blurTime);

    // Adjust the number of warps for the next run.

    int maxWarps = max(renderResult.numWarps, coarseResult.numWarps);
    if (maxWarps * 2 > m_numWarps)
    {
        if (maxWarps == m_numWarps)
            printf("CudaRenderer: warp count auto-detect overflow, increasing warp count to %d\n", maxWarps * 2);
        else
            printf("CudaRenderer: warp count auto-detected: %d warps, launching %d\n", maxWarps, maxWarps * 2);
        m_numWarps = maxWarps * 2;
    }
    return "";
}
bool App::handleEvent(const Window::Event& ev)
{
    if (ev.type == Window::EventType_Close)
    {
        m_window.showModalMessage("Exiting...");
        delete this;
        return true;
    }

    Action action = m_action;
    m_action = Action_None;
    String name;
    Mat4f mat;

    switch (action)
    {
    case Action_None:
        break;

    case Action_LoadMesh:
        name = m_window.showFileLoadDialog("Load mesh", getMeshImportFilter());
        if (name.getLength()) {
			Bvh::setBvhMode((Bvh::BvhMode)m_bvhMode);
            loadMesh(name);
		}
        break;

    case Action_ReloadMesh:
        if (m_meshFileName.getLength()) {
			Bvh::setBvhMode((Bvh::BvhMode)m_bvhMode);
            loadMesh(m_meshFileName);
		}
        break;

    case Action_SaveMesh:
        name = m_window.showFileSaveDialog("Save mesh", getMeshExportFilter());
        if (name.getLength())
            saveMesh(name);
        break;

    case Action_ResetCamera:
        if (m_mesh)
        {
            m_cameraCtrl.initForMesh(m_mesh);
            m_commonCtrl.message("Camera reset");
        }
        break;

    case Action_EncodeCameraSignature:
        m_window.setVisible(false);
        printf("\nCamera signature:\n");
        printf("%s\n", m_cameraCtrl.encodeSignature().getPtr());
        waitKey();
        break;

    case Action_DecodeCameraSignature:
        {
            m_window.setVisible(false);
            printf("\nEnter camera signature:\n");

            char buf[1024];
            if (scanf_s("%s", buf, FW_ARRAY_SIZE(buf)))
                m_cameraCtrl.decodeSignature(buf);
            else
                setError("Signature too long!");

            if (!hasError())
                printf("Done.\n\n");
            else
            {
                printf("Error: %s\n", getError().getPtr());
                clearError();
                waitKey();
            }
        }
        break;

    case Action_NormalizeScale:
        if (m_mesh)
        {
            Vec3f lo, hi;
            m_mesh->getBBox(lo, hi);
            m_mesh->xform(Mat4f::scale(Vec3f(2.0f / (hi - lo).max())) * Mat4f::translate((lo + hi) * -0.5f));
        }
        break;

    case Action_FlipXY:
        nvswap(mat.col(0), mat.col(1));
        if (m_mesh)
        {
            m_mesh->xform(mat);
            m_mesh->flipTriangles();
        }
        break;

    case Action_FlipYZ:
        nvswap(mat.col(1), mat.col(2));
        if (m_mesh)
        {
            m_mesh->xform(mat);
            m_mesh->flipTriangles();
        }
        break;

    case Action_FlipZ:
        mat.col(2) = -mat.col(2);
        if (m_mesh)
        {
            m_mesh->xform(mat);
            m_mesh->flipTriangles();
        }
        break;

    case Action_NormalizeNormals:
        if (m_mesh)
            m_mesh->xformNormals(mat.getXYZ(), true);
        break;

    case Action_FlipNormals:
        mat = -mat;
        if (m_mesh)
            m_mesh->xformNormals(mat.getXYZ(), false);
        break;

    case Action_RecomputeNormals:
        if (m_mesh)
            m_mesh->recomputeNormals();
        break;

    case Action_FlipTriangles:
        if (m_mesh)
            m_mesh->flipTriangles();
        break;

    case Action_CleanMesh:
        if (m_mesh)
            m_mesh->clean();
        break;

    case Action_CollapseVertices:
        if (m_mesh)
            m_mesh->collapseVertices();
        break;

    case Action_DupVertsPerSubmesh:
        if (m_mesh)
            m_mesh->dupVertsPerSubmesh();
        break;

    case Action_FixMaterialColors:
        if (m_mesh)
            m_mesh->fixMaterialColors();
        break;

    case Action_DownscaleTextures:
        if (m_mesh)
            downscaleTextures(m_mesh);
        break;

    case Action_ChopBehindNear:
        if (m_mesh)
        {
            Mat4f worldToClip = m_cameraCtrl.getCameraToClip() * m_cameraCtrl.getWorldToCamera();
            Vec4f pleq = worldToClip.getRow(2) + worldToClip.getRow(3);
            chopBehindPlane(m_mesh, pleq);
        }
        break;

	// Assignment 2: actions
	case Action_PlaceLightSourceAtCamera:
		m_areaLight->setOrientation( m_cameraCtrl.getCameraToWorld().getXYZ() );
		m_areaLight->setPosition( m_cameraCtrl.getPosition() );
	    m_commonCtrl.message("Placed light at camera");
		break;

	case Action_ComputeRadiosity:
		Sampler::setSequenceMode((Sampler::SequenceMode)m_samplingMode);
		m_radiosity->startRadiosityProcess( m_mesh, m_areaLight, m_rt, m_numBounces, m_numDirectRays, m_numHemisphereRays );
		m_updateClock.start();
		break;

	case Action_LoadRadiosity:
        name = m_window.showFileLoadDialog("Load radiosity solution", "rad:Radiosity Solution" );
        if (name.getLength())
            loadRadiosity(name);
		break;

	case Action_SaveRadiosity:
        name = m_window.showFileSaveDialog("Save radiosity solution", "rad:Radiosity Solution" );
        if (name.getLength())
            saveRadiosity(name);
		break;

	// Assignment 1: actions
	/*
	case Action_TracePrimaryRays:
		{
			m_renderer->setAONumRays( m_numAORays );
			m_renderer->setAORayLength( m_aoRayLength );
			m_renderer->rayTracePicture( m_mesh, m_rt, m_rtImage, m_cameraCtrl, (Renderer::ShadingMode)m_shadingMode, (Renderer::SamplingMode)m_samplingMode, (Renderer::ColorMode)m_colorMode );
			m_showRTImage = true;
		}
		break;
	*/
    default:
        FW_ASSERT(false);
        break;
    }

    m_window.setVisible(true);

    if (ev.type == Window::EventType_Paint)
        renderFrame(m_window.getGL());
    m_window.repaint();
    return false;
}
Ejemplo n.º 3
0
 void                xform               (const Mat4f& mat)              { xformPositions(mat); xformNormals(mat.getXYZ().transposed().inverted()); }