void RTShadows_DrawMetrics( RenderCore::Metal::DeviceContext& context, LightingParserContext& parserContext, MainTargetsBox& mainTargets) { SavedTargets savedTargets(context); auto restoreMarker = savedTargets.MakeResetMarker(context); context.GetUnderlying()->OMSetRenderTargets(1, savedTargets.GetRenderTargets(), nullptr); // (unbind depth) context.BindPS(MakeResourceList(5, mainTargets._gbufferRTVsSRV[0], mainTargets._gbufferRTVsSRV[1], mainTargets._gbufferRTVsSRV[2], mainTargets._msaaDepthBufferSRV)); const bool useMsaaSamplers = mainTargets._desc._sampling._sampleCount > 1; StringMeld<256> defines; defines << "SHADOW_CASCADE_MODE=2"; if (useMsaaSamplers) defines << ";MSAA_SAMPLERS=1"; auto& debuggingShader = ::Assets::GetAssetDep<Metal::ShaderProgram>( "game/xleres/basic2D.vsh:fullscreen:vs_*", "game/xleres/shadowgen/rtshadmetrics.sh:ps_main:ps_*", defines.get()); Metal::BoundUniforms uniforms(debuggingShader); Techniques::TechniqueContext::BindGlobalUniforms(uniforms); uniforms.BindShaderResources(1, {"RTSListsHead", "RTSLinkedLists", "RTSTriangles", "DepthTexture"}); uniforms.BindConstantBuffers(1, {"OrthogonalShadowProjection", "ScreenToShadowProjection"}); context.Bind(debuggingShader); context.Bind(Techniques::CommonResources()._blendStraightAlpha); SetupVertexGeneratorShader(context); for (const auto& p:parserContext._preparedRTShadows) { const Metal::ShaderResourceView* srvs[] = { &p.second._listHeadSRV, &p.second._linkedListsSRV, &p.second._trianglesSRV, &mainTargets._msaaDepthBufferSRV }; SharedPkt constants[2]; const Metal::ConstantBuffer* prebuiltConstants[2] = {nullptr, nullptr}; prebuiltConstants[0] = &p.second._orthoCB; constants[1] = BuildScreenToShadowConstants( p.second, parserContext.GetProjectionDesc()._cameraToWorld, parserContext.GetProjectionDesc()._cameraToProjection); uniforms.Apply( context, parserContext.GetGlobalUniformsStream(), Metal::UniformsStream(constants, prebuiltConstants, dimof(constants), srvs, dimof(srvs))); } context.Draw(4); }
void VegetationSpawn_Prepare( Metal::DeviceContext* context, LightingParserContext& parserContext, const VegetationSpawnConfig& cfg, VegetationSpawnResources& res) { // Prepare the scene for vegetation spawn // This means binding our output buffers to the stream output slots, // and then rendering the terrain with a special technique. // We can use flags to force the scene parser to render only the terrain // // If we use "GeometryShader::SetDefaultStreamOutputInitializers", then future // geometry shaders will be created as stream-output shaders. using namespace RenderCore; auto oldSO = Metal::GeometryShader::GetDefaultStreamOutputInitializers(); ID3D::Query* begunQuery = nullptr; auto oldCamera = parserContext.GetProjectionDesc(); CATCH_ASSETS_BEGIN auto& perlinNoiseRes = Techniques::FindCachedBox2<SceneEngine::PerlinNoiseResources>(); context->BindGS(MakeResourceList(12, perlinNoiseRes._gradShaderResource, perlinNoiseRes._permShaderResource)); context->BindGS(MakeResourceList(Metal::SamplerState())); // we have to clear vertex input "3", because this is the instancing input slot -- and // we're going to be writing to buffers that will be used for instancing. // ID3D::Buffer* nullBuffer = nullptr; unsigned zero = 0; // context->GetUnderlying()->IASetVertexBuffers(3, 1, &nullBuffer, &zero, &zero); context->Unbind<Metal::VertexBuffer>(); context->UnbindVS<Metal::ShaderResourceView>(15, 1); float maxDrawDistance = 0.f; for (const auto& m:cfg._materials) for (const auto& b:m._buckets) maxDrawDistance = std::max(b._maxDrawDistance, maxDrawDistance); class InstanceSpawnConstants { public: Float4x4 _worldToCullFrustum; float _gridSpacing, _baseDrawDistanceSq, _jitterAmount; unsigned _dummy; Float4 _materialParams[8]; Float4 _suppressionNoiseParams[8]; } instanceSpawnConstants = { parserContext.GetProjectionDesc()._worldToProjection, cfg._baseGridSpacing, maxDrawDistance*maxDrawDistance, cfg._jitterAmount, 0, { Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>() }, { Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>(), Zero<Float4>() } }; for (unsigned mi=0; mi<std::min(cfg._materials.size(), dimof(instanceSpawnConstants._materialParams)); ++mi) { instanceSpawnConstants._materialParams[mi][0] = cfg._materials[mi]._suppressionThreshold; instanceSpawnConstants._suppressionNoiseParams[mi][0] = cfg._materials[mi]._suppressionNoise; instanceSpawnConstants._suppressionNoiseParams[mi][1] = cfg._materials[mi]._suppressionGain; instanceSpawnConstants._suppressionNoiseParams[mi][2] = cfg._materials[mi]._suppressionLacunarity; } context->BindGS(MakeResourceList(5, Metal::ConstantBuffer(&instanceSpawnConstants, sizeof(InstanceSpawnConstants)))); const bool needQuery = false; if (constant_expression<needQuery>::result()) { begunQuery = res._streamOutputCountsQuery.get(); context->GetUnderlying()->Begin(begunQuery); } static const Metal::InputElementDesc eles[] = { // Our instance format is very simple. It's just a position and // rotation value (in 32 bit floats) // I'm not sure if the hardware will support 16 bit floats in a // stream output buffer (though maybe we could use fixed point // 16 bit integers?) // We write a "type" value to a second buffer. Let's keep that // buffer as small as possible, because we have to clear it // before hand Metal::InputElementDesc("INSTANCEPOS", 0, Metal::NativeFormat::R32G32B32A32_FLOAT), // vertex in slot 1 must have a vertex stride that is a multiple of 4 Metal::InputElementDesc("INSTANCEPARAM", 0, Metal::NativeFormat::R32_UINT, 1) }; // How do we clear an SO buffer? We can't make it an unorderedaccess view or render target. // The only obvious way is to use CopyResource, and copy from a prepared "cleared" buffer Metal::Copy(*context, res._streamOutputResources[1]->GetUnderlying(), res._clearedTypesResource->GetUnderlying()); unsigned strides[2] = { Stream0VertexSize, Stream1VertexSize }; Metal::GeometryShader::SetDefaultStreamOutputInitializers( Metal::GeometryShader::StreamOutputInitializers(eles, dimof(eles), strides, 2)); SceneParseSettings parseSettings( SceneParseSettings::BatchFilter::General, SceneParseSettings::Toggles::Terrain); // Adjust the far clip so that it's very close... // We might want to widen the field of view slightly // by moving the camera back a bit. This could help make // sure that objects near the camera and on the edge of the screen // get included auto newProjDesc = AdjustProjDesc(oldCamera, maxDrawDistance); // We have to call "SetGlobalTransform" to force the camera changes to have effect. // Ideally there would be a cleaner way to automatically update the constants // when the bound camera changes... LightingParser_SetGlobalTransform(*context, parserContext, newProjDesc); context->BindSO(MakeResourceList(res._streamOutputBuffers[0], res._streamOutputBuffers[1])); parserContext.GetSceneParser()->ExecuteScene(context, parserContext, parseSettings, 5); context->UnbindSO(); // After the scene execute, we need to use a compute shader to separate the // stream output data into it's bins. static const unsigned MaxOutputBinCount = 8; ID3D::UnorderedAccessView* outputBins[MaxOutputBinCount]; unsigned initialCounts[MaxOutputBinCount]; std::fill(outputBins, &outputBins[dimof(outputBins)], nullptr); std::fill(initialCounts, &initialCounts[dimof(initialCounts)], 0); auto outputBinCount = std::min((unsigned)cfg._objectTypes.size(), (unsigned)res._instanceBufferUAVs.size()); for (unsigned c=0; c<outputBinCount; ++c) { unsigned clearValues[] = { 0, 0, 0, 0 }; context->Clear(res._instanceBufferUAVs[c], clearValues); outputBins[c] = res._instanceBufferUAVs[c].GetUnderlying(); } context->BindCS(MakeResourceList(res._streamOutputSRV[0], res._streamOutputSRV[1])); context->GetUnderlying()->CSSetUnorderedAccessViews(0, outputBinCount, outputBins, initialCounts); class InstanceSeparateConstants { public: UInt4 _binThresholds[16]; Float4 _drawDistanceSq[16]; } instanceSeparateConstants; XlZeroMemory(instanceSeparateConstants); StringMeld<1024> shaderParams; unsigned premapBinCount = 0; for (unsigned mi=0; mi<cfg._materials.size(); ++mi) { const auto& m = cfg._materials[mi]; float combinedWeight = 0.f; // m._noSpawnWeight; for (const auto& b:m._buckets) combinedWeight += b._frequencyWeight; unsigned weightIterator = 0; for (unsigned c=0; c<std::min(dimof(instanceSeparateConstants._binThresholds), m._buckets.size()); ++c) { weightIterator += unsigned(4095.f * m._buckets[c]._frequencyWeight / combinedWeight); instanceSeparateConstants._binThresholds[premapBinCount][0] = (mi<<12) | weightIterator; instanceSeparateConstants._drawDistanceSq[premapBinCount][0] = m._buckets[c]._maxDrawDistance * m._buckets[c]._maxDrawDistance; shaderParams << "OUTPUT_BUFFER_MAP" << premapBinCount << "=" << m._buckets[c]._objectType << ";"; ++premapBinCount; } } for (unsigned c=premapBinCount; c<dimof(instanceSeparateConstants._binThresholds); ++c) shaderParams << "OUTPUT_BUFFER_MAP" << c << "=0;"; shaderParams << "INSTANCE_BIN_COUNT=" << premapBinCount; context->BindCS(MakeResourceList( parserContext.GetGlobalTransformCB(), Metal::ConstantBuffer(&instanceSeparateConstants, sizeof(instanceSeparateConstants)))); context->Bind(::Assets::GetAssetDep<Metal::ComputeShader>( "game/xleres/Vegetation/InstanceSpawnSeparate.csh:main:cs_*", shaderParams.get())); context->Dispatch(StreamOutputMaxCount / 256); // unbind all of the UAVs again context->UnbindCS<Metal::UnorderedAccessView>(0, outputBinCount); context->UnbindCS<Metal::ShaderResourceView>(0, 2); res._isPrepared = true; CATCH_ASSETS_END(parserContext) if (begunQuery) { context->GetUnderlying()->End(begunQuery); } // (reset the camera transform if it's changed) LightingParser_SetGlobalTransform(*context, parserContext, oldCamera); context->UnbindSO(); Metal::GeometryShader::SetDefaultStreamOutputInitializers(oldSO); // oldTargets.ResetToOldTargets(context); }
PreparedRTShadowFrustum PrepareRTShadows( IThreadContext& context, Metal::DeviceContext& metalContext, LightingParserContext& parserContext, PreparedScene& preparedScene, const ShadowProjectionDesc& frustum, unsigned shadowFrustumIndex) { SceneParseSettings sceneParseSettings( SceneParseSettings::BatchFilter::RayTracedShadows, ~SceneParseSettings::Toggles::BitField(0), shadowFrustumIndex); if (!parserContext.GetSceneParser()->HasContent(sceneParseSettings)) return PreparedRTShadowFrustum(); Metal::GPUProfiler::DebugAnnotation anno(metalContext, L"Prepare-RTShadows"); auto& box = Techniques::FindCachedBox2<RTShadowsBox>(256, 256, 1024*1024, 32, 64*1024); auto oldSO = Metal::GeometryShader::GetDefaultStreamOutputInitializers(); static const Metal::InputElementDesc soVertex[] = { Metal::InputElementDesc("A", 0, Metal::NativeFormat::R32G32B32A32_FLOAT), Metal::InputElementDesc("B", 0, Metal::NativeFormat::R32G32_FLOAT), Metal::InputElementDesc("C", 0, Metal::NativeFormat::R32G32B32A32_FLOAT), Metal::InputElementDesc("D", 0, Metal::NativeFormat::R32G32B32_FLOAT) }; static const Metal::InputElementDesc il[] = { Metal::InputElementDesc("A", 0, Metal::NativeFormat::R32G32B32A32_FLOAT), Metal::InputElementDesc("B", 0, Metal::NativeFormat::R32G32_FLOAT), Metal::InputElementDesc("C", 0, Metal::NativeFormat::R32G32B32A32_FLOAT), Metal::InputElementDesc("D", 0, Metal::NativeFormat::R32G32B32_FLOAT) }; metalContext.UnbindPS<Metal::ShaderResourceView>(5, 3); const unsigned bufferCount = 1; unsigned strides[] = { 52 }; unsigned offsets[] = { 0 }; Metal::GeometryShader::SetDefaultStreamOutputInitializers( Metal::GeometryShader::StreamOutputInitializers(soVertex, dimof(soVertex), strides, 1)); static_assert(bufferCount == dimof(strides), "Stream output buffer count mismatch"); static_assert(bufferCount == dimof(offsets), "Stream output buffer count mismatch"); metalContext.BindSO(MakeResourceList(box._triangleBufferVB)); // set up the render state for writing into the grid buffer SavedTargets savedTargets(metalContext); metalContext.Bind(box._gridBufferViewport); metalContext.Unbind<Metal::RenderTargetView>(); metalContext.Bind(Techniques::CommonResources()._blendOpaque); metalContext.Bind(Techniques::CommonResources()._defaultRasterizer); // for newer video cards, we need "conservative raster" enabled PreparedRTShadowFrustum preparedResult; preparedResult.InitialiseConstants(&metalContext, frustum._projections); using TC = Techniques::TechniqueContext; parserContext.SetGlobalCB(metalContext, TC::CB_ShadowProjection, &preparedResult._arbitraryCBSource, sizeof(preparedResult._arbitraryCBSource)); parserContext.SetGlobalCB(metalContext, TC::CB_OrthoShadowProjection, &preparedResult._orthoCBSource, sizeof(preparedResult._orthoCBSource)); parserContext.GetTechniqueContext()._runtimeState.SetParameter( StringShadowCascadeMode, (preparedResult._mode == ShadowProjectionDesc::Projections::Mode::Ortho)?2:1); // Now, we need to transform the object's triangle buffer into shadow // projection space during this step (also applying skinning, wind bending, and // any other animation effects. // // Each object that will be used with projected shadows must have a buffer // containing the triangle information. // // We can deal with this in a number of ways: // 1. rtwritetiles shader writes triangles out in a stream-output step // 2. transform triangles first, then pass that information through the rtwritetiles shader // 3. transform triangles completely separately from the rtwritetiles step // // Method 1 would avoid extra transformations of the input data, and actually // simplifies some of the shader work. We don't need any special input buffers // or extra input data. The shaders just take generic model information, and build // everything they need, as they need it. // // We can also choose to reject backfacing triangles at this point, as well as // removing triangles that are culled from the frustum. // Float4x4 savedWorldToProjection = parserContext.GetProjectionDesc()._worldToProjection; parserContext.GetProjectionDesc()._worldToProjection = frustum._worldToClip; auto cleanup = MakeAutoCleanup( [&parserContext, &savedWorldToProjection]() { parserContext.GetProjectionDesc()._worldToProjection = savedWorldToProjection; parserContext.GetTechniqueContext()._runtimeState.SetParameter(StringShadowCascadeMode, 0); }); CATCH_ASSETS_BEGIN parserContext.GetSceneParser()->ExecuteScene( context, parserContext, sceneParseSettings, preparedScene, TechniqueIndex_RTShadowGen); CATCH_ASSETS_END(parserContext) metalContext.UnbindSO(); Metal::GeometryShader::SetDefaultStreamOutputInitializers(oldSO); // We have the list of triangles. Let's render then into the final // grid buffer viewport. This should create a list of triangles for // each cell in the grid. The goal is to reduce the number of triangles // that the ray tracing shader needs to look at. // // We could attempt to do this in the same step above. But that creates // some problems with frustum cull and back face culling. This order // allows us reduce the total triangle count before we start assigning // primitive ids. // // todo -- also calculate min/max for each grid during this step CATCH_ASSETS_BEGIN auto& shader = ::Assets::GetAssetDep<Metal::ShaderProgram>( "game/xleres/shadowgen/rtwritetiles.sh:vs_passthrough:vs_*", "game/xleres/shadowgen/consraster.sh:gs_conservativeRasterization:gs_*", "game/xleres/shadowgen/rtwritetiles.sh:ps_main:ps_*", "OUTPUT_PRIM_ID=1;INPUT_RAYTEST_TRIS=1"); metalContext.Bind(shader); Metal::BoundInputLayout inputLayout(Metal::InputLayout(il, dimof(il)), shader); metalContext.Bind(inputLayout); // no shader constants/resources required unsigned clearValues[] = { 0, 0, 0, 0 }; metalContext.Clear(box._gridBufferUAV, clearValues); metalContext.Bind(Techniques::CommonResources()._blendOpaque); metalContext.Bind(Techniques::CommonResources()._dssDisable); metalContext.Bind(Techniques::CommonResources()._cullDisable); metalContext.Bind(Metal::Topology::PointList); metalContext.Bind(MakeResourceList(box._triangleBufferVB), strides[0], offsets[0]); metalContext.Bind( MakeResourceList(box._dummyRTV), nullptr, MakeResourceList(box._gridBufferUAV, box._listsBufferUAV)); metalContext.DrawAuto(); CATCH_ASSETS_END(parserContext) metalContext.Bind(Metal::Topology::TriangleList); savedTargets.ResetToOldTargets(metalContext); preparedResult._listHeadSRV = box._gridBufferSRV; preparedResult._linkedListsSRV = box._listsBufferSRV; preparedResult._trianglesSRV = box._triangleBufferSRV; return std::move(preparedResult); }
void AmbientOcclusion_Render( Metal::DeviceContext* context, LightingParserContext& parserContext, AmbientOcclusionResources& resources, Metal::ShaderResourceView& depthBuffer, Metal::ShaderResourceView* normalsBuffer, const Metal::ViewportDesc& mainViewport) { // Not working for orthogonal projection matrices if (IsOrthogonalProjection(parserContext.GetProjectionDesc()._cameraToProjection)) return; static float SceneScale = 1.f; // // See nvidia header on documentation for interface to NVSSAO // Note that MSAA behaviour is a little strange. the nvidia library // will take a MSAA render target as input, but will write out // non-MSAA data. So we can't blend directly to a MSAA buffer ( // blending a non-MSAA buffer with a MSAA buffer will remove the // samples information!) // GFSDK_SSAO_InputData_D3D11 inputData; auto projectionMatrixTranspose = parserContext.GetProjectionDesc()._cameraToProjection; inputData.DepthData.DepthTextureType = GFSDK_SSAO_HARDWARE_DEPTHS; inputData.DepthData.ProjectionMatrix.Data = GFSDK_SSAO_Float4x4((const float*)&projectionMatrixTranspose); inputData.DepthData.ProjectionMatrix.Layout = GFSDK_SSAO_COLUMN_MAJOR_ORDER; inputData.DepthData.pFullResDepthTextureSRV = depthBuffer.GetUnderlying(); inputData.DepthData.MetersToViewSpaceUnits = SceneScale; inputData.DepthData.Viewport.Enable = true; inputData.DepthData.Viewport.TopLeftX = (GFSDK_SSAO_UINT)mainViewport.TopLeftX; inputData.DepthData.Viewport.TopLeftY = (GFSDK_SSAO_UINT)mainViewport.TopLeftY; inputData.DepthData.Viewport.Width = (GFSDK_SSAO_UINT)mainViewport.Width; inputData.DepthData.Viewport.Height = (GFSDK_SSAO_UINT)mainViewport.Height; inputData.DepthData.Viewport.MinDepth = mainViewport.MinDepth; inputData.DepthData.Viewport.MaxDepth = mainViewport.MaxDepth; if (resources._useNormals && normalsBuffer) { if (resources._normalsResolveFormat != Metal::NativeFormat::Unknown) { context->GetUnderlying()->ResolveSubresource( resources._resolvedNormals.get(), 0, Metal::ExtractResource<ID3D::Resource>(normalsBuffer->GetUnderlying()).get(), 0, Metal::AsDXGIFormat(resources._normalsResolveFormat)); inputData.NormalData.pFullResNormalTextureSRV = resources._resolvedNormalsSRV.GetUnderlying(); } else { inputData.NormalData.pFullResNormalTextureSRV = normalsBuffer->GetUnderlying(); } // when using UNORM normal data, use: // inputData.NormalData.DecodeScale = 2.f; // inputData.NormalData.DecodeBias = -1.f; assert(Metal::GetComponentType( Metal::AsNativeFormat(Metal::TextureDesc2D(normalsBuffer->GetUnderlying()).Format)) == Metal::FormatComponentType::SNorm); inputData.NormalData.DecodeScale = 1.f; inputData.NormalData.DecodeBias = 0.f; auto worldToView = InvertOrthonormalTransform(parserContext.GetProjectionDesc()._cameraToWorld); worldToView(2, 0) = -worldToView(2, 0); worldToView(2, 1) = -worldToView(2, 1); worldToView(2, 2) = -worldToView(2, 2); worldToView(2, 3) = -worldToView(2, 3); inputData.NormalData.WorldToViewMatrix.Data = GFSDK_SSAO_Float4x4((float*)&worldToView); inputData.NormalData.WorldToViewMatrix.Layout = GFSDK_SSAO_COLUMN_MAJOR_ORDER; inputData.NormalData.Enable = true; } context->InvalidateCachedState(); // (nvidia code might change some states) // Getting a warning message here if the pixel shader used // immediately before this point uses class instances. Seems to // be ok if we unbind the pixel shader first. context->Unbind<RenderCore::Metal::PixelShader>(); context->Unbind<RenderCore::Metal::VertexShader>(); context->Unbind<RenderCore::Metal::GeometryShader>(); auto parameters = BuildAOParameters(); auto status = resources._aoContext->RenderAO( context->GetUnderlying(), &inputData, ¶meters, resources._aoTarget.GetUnderlying()); assert(status == GFSDK_SSAO_OK); (void)status; if (Tweakable("AODebugging", false)) { parserContext._pendingOverlays.push_back( std::bind(&AmbientOcclusion_DrawDebugging, std::placeholders::_1, std::ref(resources))); } }