void EdgeDistanceComputeShader::setParameters( ID3D11DeviceContext& deviceContext,
                                               const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& distToEdgeTexture,
                                               const unsigned char passIndex )
{
    if ( !m_compiled ) throw std::exception( "EdgeDistanceComputeShader::setParameters - Shader hasn't been compiled yet." );

    { // Set input textures.
        const unsigned int resourceCount = 1;
        ID3D11ShaderResourceView* resources[ resourceCount ] = { 
            distToEdgeTexture.getShaderResourceView()
        };

        deviceContext.CSSetShaderResources( 0, resourceCount, resources );
    }

    D3D11_MAPPED_SUBRESOURCE mappedResource;
    ConstantBuffer* dataPtr;

    HRESULT result = deviceContext.Map( m_constantInputBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource );
    if ( result < 0 ) throw std::exception( "EdgeDistanceComputeShader::setParameters - mapping constant buffer to CPU memory failed." );

    dataPtr = (ConstantBuffer*)mappedResource.pData;

    dataPtr->passIndex = passIndex;
    dataPtr->pad1      = float3( 0.0f, 0.0f, 0.0f ); // Padding.

    deviceContext.Unmap( m_constantInputBuffer.Get(), 0 );

    deviceContext.CSSetConstantBuffers( 0, 1, m_constantInputBuffer.GetAddressOf() );
}
void RasterizingShadowsComputeShader::setParameters(
    ID3D11DeviceContext& deviceContext,
    const float3& cameraPos,
    const Light& light,
    const Texture2DSpecBind< TexBind::ShaderResource, float4 >& rayOriginTexture,
    const Texture2DSpecBind< TexBind::ShaderResource, float4 >& surfaceNormalTexture,
    const int outputTextureWidth, const int outputTextureHeight )
{
    if ( !m_compiled ) 
        throw std::exception( "RasterizingShadowsComputeShader::setParameters - Shader hasn't been compiled yet." );

    std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, float > > shadowMap;

    if ( light.getType() == Light::Type::SpotLight )
        shadowMap = static_cast<const SpotLight&>( light ).getShadowMap();


    { // Set input buffers and textures.
        const unsigned int resourceCount = 3;
        ID3D11ShaderResourceView* resources[ resourceCount ] = {
            rayOriginTexture.getShaderResourceView(),
            surfaceNormalTexture.getShaderResourceView(),
            shadowMap ? shadowMap->getShaderResourceView() : nullptr,
        };

        deviceContext.CSSetShaderResources( 0, resourceCount, resources );
    }

    { // Set constant buffer.
        D3D11_MAPPED_SUBRESOURCE mappedResource;
        ConstantBuffer* dataPtr;

        HRESULT result = deviceContext.Map( m_constantInputBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource );
        if ( result < 0 ) 
            throw std::exception( "RasterizingShadowsComputeShader::setParameters - mapping constant buffer to CPU memory failed." );

        dataPtr = (ConstantBuffer*)mappedResource.pData;

        dataPtr->outputTextureSize  = float2( (float)outputTextureWidth, (float)outputTextureHeight );
        dataPtr->lightPosition      = light.getPosition();
        dataPtr->lightEmitterRadius = light.getEmitterRadius();

        const SpotLight& spotLight = static_cast<const SpotLight&>( light );

        dataPtr->shadowMapViewMatrix       = spotLight.getShadowMapViewMatrix().getTranspose();
        dataPtr->shadowMapProjectionMatrix = spotLight.getShadowMapProjectionMatrix().getTranspose();
        dataPtr->lightConeMinDot           = cos( spotLight.getConeAngle() );
        dataPtr->lightDirection            = spotLight.getDirection();
        dataPtr->cameraPosition            = cameraPos;

        deviceContext.Unmap( m_constantInputBuffer.Get(), 0 );

        deviceContext.CSSetConstantBuffers( 0, 1, m_constantInputBuffer.GetAddressOf() );
    }

    { // Set texture samplers.
        ID3D11SamplerState* samplerStates[] = { m_pointSamplerState.Get(), m_linearSamplerState.Get() };
        deviceContext.CSSetSamplers( 0, 2, samplerStates );
    }
}
Ejemplo n.º 3
0
	/** Sets a buffer of values on the current shader instance. */
	void Shader::SetConstantBuffer(u32 index, std::shared_ptr<ConstantBufferBase> buffer)
	{
		ID3D11Buffer* bufferD3D = buffer != nullptr ? buffer->GetBuffer() : nullptr;
		ID3D11DeviceContext* context = GetParent()->GetDeviceContext();
		switch(GetType())
		{
		case ShaderType::Compute:
			context->CSSetConstantBuffers(index, 1, &bufferD3D);
			break;
		case ShaderType::Domain:
			context->DSSetConstantBuffers(index, 1, &bufferD3D);
			break;
		case ShaderType::Geometry:
			context->GSSetConstantBuffers(index, 1, &bufferD3D);
			break;
		case ShaderType::Hull:
			context->HSSetConstantBuffers(index, 1, &bufferD3D);
			break;
		case ShaderType::Pixel:
			context->PSSetConstantBuffers(index, 1, &bufferD3D);
			break;
		case ShaderType::Vertex:
			context->VSSetConstantBuffers(index, 1, &bufferD3D);
			break;
		}
	}
void GenerateFirstRefractedRaysComputeShader::setParameters( ID3D11DeviceContext& deviceContext, const float3 cameraPos, const float3 viewportCenter, 
                                                             const float3 viewportUp, const float3 viewportRight, const float2 viewportSize,
                                                             const Texture2DSpecBind< TexBind::ShaderResource, float4 >& positionTexture,
                                                             const Texture2DSpecBind< TexBind::ShaderResource, float4 >& normalTexture,
                                                             const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& roughnessTexture,
                                                             const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& refractiveIndexTexture,
                                                             const Texture2DSpecBind< TexBind::ShaderResource, uchar4 >& contributionTermTexture,
                                                             const int outputTextureWidth, const int outputTextureHeight )
{
    if ( !m_compiled ) throw std::exception( "GenerateFirstRefractedRaysComputeShader::setParameters - Shader hasn't been compiled yet." );

    { // Set input buffers and textures.
        m_resourceCount = 5 /*+ (int)refractiveIndexTextures.size()*/;
        std::vector< ID3D11ShaderResourceView* > resources;
        resources.reserve( m_resourceCount );

        resources.push_back( positionTexture.getShaderResourceView() );
        resources.push_back( normalTexture.getShaderResourceView() );
        resources.push_back( roughnessTexture.getShaderResourceView() );
        resources.push_back( refractiveIndexTexture.getShaderResourceView() );
        resources.push_back( contributionTermTexture.getShaderResourceView() );

        /*for ( int i = 0; i < refractiveIndexTextures .size(); ++i )
            resources.push_back( refractiveIndexTextures[ i ]->getShaderResourceView() );*/

        deviceContext.CSSetShaderResources( 0, m_resourceCount, resources.data() );
    }

    D3D11_MAPPED_SUBRESOURCE mappedResource;
    ConstantBuffer* dataPtr;

    HRESULT result = deviceContext.Map( m_constantInputBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource );
    if ( result < 0 ) throw std::exception( "GenerateFirstRefractedRaysComputeShader::setParameters - mapping constant buffer to CPU memory failed." );

    dataPtr = (ConstantBuffer*)mappedResource.pData;

    dataPtr->cameraPos          = cameraPos;
    dataPtr->viewportCenter     = viewportCenter;
    dataPtr->viewportUp         = viewportUp;
    dataPtr->viewportRight      = viewportRight;
    dataPtr->viewportSizeHalf   = viewportSize / 2.0f;
    dataPtr->outputTextureSize  = float2( (float)outputTextureWidth, (float)outputTextureHeight );

    // Padding.
    dataPtr->pad1 = 0.0f;
    dataPtr->pad2 = 0.0f;
    dataPtr->pad3 = 0.0f;
    dataPtr->pad4 = 0.0f;
    dataPtr->pad5 = float2( 0.0f, 0.0f );

    deviceContext.Unmap( m_constantInputBuffer.Get(), 0 );

    deviceContext.CSSetConstantBuffers( 0, 1, m_constantInputBuffer.GetAddressOf() );

    ID3D11SamplerState* samplerStates[] = { m_samplerStateLinearFilter.Get() };
    deviceContext.CSSetSamplers( 0, 1, samplerStates );
}
void ShadingComputeShader::setParameters( ID3D11DeviceContext& deviceContext, const float3& cameraPos,
                                          const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, float4 > > positionTexture,
                                          const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, uchar4 > > albedoTexture, 
                                          const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, unsigned char > > metalnessTexture, 
                                          const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, unsigned char > > roughnessTexture, 
                                          const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, float4 > > normalTexture,
										  const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, unsigned char > > illuminationTexture,
									      const Light& light )
{
    if ( !m_compiled ) 
        throw std::exception( "ShadingComputeShader::setParameters - Shader hasn't been compiled yet." );

    { // Set input buffers and textures.
        const unsigned int resourceCount = 6;
        ID3D11ShaderResourceView* resources[ resourceCount ] = { 
            positionTexture->getShaderResourceView(),
            albedoTexture->getShaderResourceView(),
            metalnessTexture->getShaderResourceView(),
            roughnessTexture->getShaderResourceView(),
            normalTexture->getShaderResourceView(),
			illuminationTexture->getShaderResourceView()
        };

        deviceContext.CSSetShaderResources( 0, resourceCount, resources );
    }

    { // Set constant buffer.
        D3D11_MAPPED_SUBRESOURCE mappedResource;
        ConstantBuffer* dataPtr;

        HRESULT result = deviceContext.Map( m_constantInputBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource );
        if ( result < 0 ) 
            throw std::exception( "ShadingComputeShader::setParameters - mapping constant buffer to CPU memory failed." );

        dataPtr = (ConstantBuffer*)mappedResource.pData;

        dataPtr->cameraPos         = cameraPos;
        dataPtr->pad1		       = 0.0f;
        dataPtr->lightPosition     = float4( light.getPosition(), 0.0f );
        dataPtr->lightColor        = float4( light.getColor(), 0.0f );
        dataPtr->outputTextureSize = float2( (float)positionTexture->getWidth(), (float)positionTexture->getHeight() ); // #TODO: Size should be taken from real output texture, not one of inputs (right now, we are assuming they have the same size).

        deviceContext.Unmap( m_constantInputBuffer.Get(), 0 );

        deviceContext.CSSetConstantBuffers( 0, 1, m_constantInputBuffer.GetAddressOf() );
    }

    { // Set texture sampler.
        ID3D11SamplerState* samplers[] = { m_linearSamplerState.Get(), m_pointSamplerState.Get() };
        deviceContext.CSSetSamplers( 0, 2, samplers );
    }
}
void GenerateRefractedRaysComputeShader::setParameters( ID3D11DeviceContext& deviceContext, const unsigned int refractionLevel,
                                                        const Texture2DSpecBind< TexBind::ShaderResource, float4 >& rayDirectionTexture,
                                                        const Texture2DSpecBind< TexBind::ShaderResource, float4 >& rayHitPositionTexture,
                                                        const Texture2DSpecBind< TexBind::ShaderResource, float4 >& rayHitNormalTexture,
                                                        const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& rayHitRoughnessTexture,
                                                        const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& rayHitRefractiveIndexTexture,
                                                        const Texture2DSpecBind< TexBind::ShaderResource, uchar4 >& contributionTermTexture,
                                                        const std::shared_ptr< Texture2DSpecBind< TexBind::ShaderResource, unsigned char > > prevRefractiveIndexTexture, // Only makes sense for refraction level >= 2.
                                                        const std::shared_ptr< const Texture2DSpecBind< TexBind::ShaderResource, unsigned char > > currentRefractiveIndexTexture,
                                                        const int outputTextureWidth, const int outputTextureHeight )
{
    if ( !m_compiled ) throw std::exception( "GenerateRefractedRaysComputeShader::setParameters - Shader hasn't been compiled yet." );

    { // Set input buffers and textures.
        m_resourceCount = 8;
        std::vector< ID3D11ShaderResourceView* > resources;
        resources.reserve( m_resourceCount );

        resources.push_back( rayDirectionTexture.getShaderResourceView() );
        resources.push_back( rayHitPositionTexture.getShaderResourceView() );
        resources.push_back( rayHitNormalTexture.getShaderResourceView() );
        resources.push_back( rayHitRoughnessTexture.getShaderResourceView() );
        resources.push_back( rayHitRefractiveIndexTexture.getShaderResourceView() );
        resources.push_back( contributionTermTexture.getShaderResourceView() );

        resources.push_back( prevRefractiveIndexTexture ? prevRefractiveIndexTexture->getShaderResourceView() : nullptr );
        resources.push_back( currentRefractiveIndexTexture ? currentRefractiveIndexTexture->getShaderResourceView() : nullptr );

        deviceContext.CSSetShaderResources( 0, m_resourceCount, resources.data() );
    }

    D3D11_MAPPED_SUBRESOURCE mappedResource;
    ConstantBuffer* dataPtr;

    HRESULT result = deviceContext.Map( m_constantInputBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource );
    if ( result < 0 ) throw std::exception( "GenerateRefractedRaysComputeShader::setParameters - mapping constant buffer to CPU memory failed." );

    dataPtr = (ConstantBuffer*)mappedResource.pData;

    dataPtr->refractionLevel   = refractionLevel;
    dataPtr->outputTextureSize = float2( (float)outputTextureWidth, (float)outputTextureHeight );

    deviceContext.Unmap( m_constantInputBuffer.Get(), 0 );

    deviceContext.CSSetConstantBuffers( 0, 1, m_constantInputBuffer.GetAddressOf() );

    ID3D11SamplerState* samplerStates[] = { m_samplerStateLinearFilter.Get() };
    deviceContext.CSSetSamplers( 0, 1, samplerStates );
}
Ejemplo n.º 7
0
//==================================================================================================================================
void HDR::UpdateContantBuffer(int mipLevel0, int mipLevel1, unsigned int width, unsigned int height)
{
	ID3D11DeviceContext* context = mD3DSystem->GetDeviceContext();
	
	cbConstants cConst;
	cConst.width = width;
	cConst.height = height;
	cConst.mipLevel0 = mipLevel0;
	cConst.mipLevel1 = mipLevel1;
	D3D11_MAPPED_SUBRESOURCE mapped_res;
	context->Map(m_pCSConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_res);
	{
		assert(mapped_res.pData);
		*(cbConstants*)mapped_res.pData = cConst;
	}
	context->Unmap(m_pCSConstants, 0);
	
	context->CSSetConstantBuffers(1, 1, &m_pCSConstants);
}
Ejemplo n.º 8
0
//==================================================================================================================================
void HDR::UpdateBloomConstants(float middleGrey, float bloomThreshold, float bloomMultiplier)
{
	ID3D11DeviceContext* context = mD3DSystem->GetDeviceContext();
	
	cbBloomConstants cBC;
	cBC.g_MiddleGrey = middleGrey;
	cBC.g_BloomThreshold = bloomThreshold;
	cBC.g_BloomMultiplier = bloomMultiplier;
	cBC.padding = 0;
	D3D11_MAPPED_SUBRESOURCE mapped_res;
	context->Map(m_pBloomConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_res);
	{
		assert(mapped_res.pData);
		*(cbBloomConstants*)mapped_res.pData = cBC;
	}
	context->Unmap(m_pBloomConstants, 0);
	
	context->CSSetConstantBuffers(2, 1, &m_pBloomConstants);
	context->PSSetConstantBuffers(2, 1, &m_pBloomConstants);
}
void BindableProgram::Bind 
 (ID3D11DeviceContext&          context,
  ShaderLibrary&                library,
  const SourceConstantBufferPtr src_buffers [DEVICE_CONSTANT_BUFFER_SLOTS_COUNT],
  const InputLayout&            input_layout,
  BindableProgramContext&       program_context)
{
  try
  {
      //поиск входного лэйаута

    if (!program_context.input_layout)
    {
      program_context.input_layout = program.GetInputLayout (library, input_layout);
   
      context.IASetInputLayout (program_context.input_layout.get ());
    }

      //биндинг программы

    if (!program_context.program_binded)
    {      
      program.Bind (context);

      program_context.program_binded = true;
    }

      //поиск и биндинг буферов
//TODO: bindable buffers cache for dirty switch optimization    
//    if (program_context.has_dirty_buffers)
    {
      ID3D11Buffer* buffers [ShaderType_Num][DEVICE_CONSTANT_BUFFER_SLOTS_COUNT];

      memset (buffers, 0, sizeof (buffers));

      for (BufferPrototypeArray::iterator iter=buffer_prototypes.begin (), end=buffer_prototypes.end (); iter!=end; ++iter)
      {
        TargetConstantBufferPrototype& prototype = **iter;

        const unsigned char* index = prototype.GetSourceBuffersIndices ();
        bool                 dirty = false;  

        for (size_t i=0, count=prototype.GetSourceBuffersCount (); i<count; i++, index++)
          if (program_context.dirty_buffers [*index])
          {
            dirty = true;
            break;
          }

//        if (!dirty)
//          continue;

        TargetConstantBuffer& buffer = prototype.GetBuffer (src_buffers, library);

        buffer.Bind (context, buffers);
      }

        //установка контекста

      context.CSSetConstantBuffers (0, DEVICE_CONSTANT_BUFFER_SLOTS_COUNT, buffers [ShaderType_Compute]);
      context.DSSetConstantBuffers (0, DEVICE_CONSTANT_BUFFER_SLOTS_COUNT, buffers [ShaderType_Domain]);
      context.GSSetConstantBuffers (0, DEVICE_CONSTANT_BUFFER_SLOTS_COUNT, buffers [ShaderType_Geometry]);
      context.HSSetConstantBuffers (0, DEVICE_CONSTANT_BUFFER_SLOTS_COUNT, buffers [ShaderType_Hull]);
      context.PSSetConstantBuffers (0, DEVICE_CONSTANT_BUFFER_SLOTS_COUNT, buffers [ShaderType_Pixel]);
      context.VSSetConstantBuffers (0, DEVICE_CONSTANT_BUFFER_SLOTS_COUNT, buffers [ShaderType_Vertex]);

      program_context.has_dirty_buffers = false;

      memset (program_context.dirty_buffers, 0, sizeof (program_context.dirty_buffers));
    }
  }
  catch (xtl::exception& e)
  {
    e.touch ("render::low_level::dx11::BindableProgram::Bind");
    throw;
  }
}
int _tmain(int /*argc*/, _TCHAR* /*argv[]*/)
{
    // GROUP_SIZE_X defined in kernel.hlsl must match the 
    // groupSize declared here.
    size_t const groupSize = 512;
    size_t const numGroups = 16;
    size_t const dimension = numGroups*groupSize;

    // Create a D3D11 device and immediate context. 
    // TODO: The code below uses the default video adapter, with the
    // default set of feature levels.  Please see the MSDN docs if 
    // you wish to control which adapter and feature level are used.
    D3D_FEATURE_LEVEL featureLevel;
    ID3D11Device* device = nullptr;
    ID3D11DeviceContext* context = nullptr;
    HRESULT hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 
        NULL, NULL, 0, D3D11_SDK_VERSION, &device, 
        &featureLevel, &context);
    if (FAILED(hr))
    {
        printf("D3D11CreateDevice failed with return code %x\n", hr);
        return hr;
    }

    // Create system memory and fill it with our initial data.  Note that
    // these data structures aren't really necessary , it's just a demonstration
    // of how you can take existing data structures you might have and copy
    // their data to/from GPU computations.
    std::vector<float> x(dimension);
    std::vector<float> y(dimension);
    std::vector<float> z(dimension);
    float const a = 2.0f;
    for (size_t i = 0; i < dimension; ++ i)
    {
        x[i] = static_cast<float>(i);
        y[i] = 100 - static_cast<float>(i);
    }

    // Create structured buffers for the "x" and "y" vectors.
    D3D11_BUFFER_DESC inputBufferDesc;
    inputBufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
    // The buffers are read-only by the GPU, writeable by the CPU.
    // TODO: If you will never again upate the data in a GPU buffer,
    // you might want to look at using a D3D11_SUBRESOURCE_DATA here to
    // provide the initialization data instead of doing the mapping 
    // and copying that happens below.
    inputBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
    inputBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    inputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    inputBufferDesc.StructureByteStride = sizeof(float);
    inputBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* xBuffer = nullptr;
    hr = device->CreateBuffer(&inputBufferDesc, NULL, &xBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for x buffer with return code %x\n", hr);
        return hr;
    }
    // We can re-use inputBufferDesc here because the layout and usage of the x
    // and y buffers is exactly the same.
    ID3D11Buffer* yBuffer = nullptr;
    hr = device->CreateBuffer(&inputBufferDesc, NULL, &yBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for x buffer with return code %x\n", hr);
        return hr;
    }

    // Create shader resource views for the "x" and "y" buffers.
    // TODO: You can optionally provide a D3D11_SHADER_RESOURCE_VIEW_DESC
    // as the second parameter if you need to use only part of the buffer
    // inside the compute shader.
    ID3D11ShaderResourceView* xSRV = nullptr;
    hr = device->CreateShaderResourceView(xBuffer, NULL, &xSRV);
    if (FAILED(hr))
    {
        printf("CreateShaderResourceView failed for x buffer with return code %x\n", hr);
        return hr;
    }

    ID3D11ShaderResourceView* ySRV = nullptr;
    hr = device->CreateShaderResourceView(yBuffer, NULL, &ySRV);
    if (FAILED(hr))
    {
        printf("CreateShaderResourceView failed for y buffer with return code %x\n", hr);
        return hr;
    }

    // Create a structured buffer for the "z" vector.  This buffer needs to be 
    // writeable by the GPU, so we can't create it with CPU read/write access.
    D3D11_BUFFER_DESC outputBufferDesc;
    outputBufferDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS 
        | D3D11_BIND_SHADER_RESOURCE;
    outputBufferDesc.Usage = D3D11_USAGE_DEFAULT;
    outputBufferDesc.CPUAccessFlags = 0;
    outputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    outputBufferDesc.StructureByteStride = sizeof(float);
    outputBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* zBuffer = nullptr;
    hr = device->CreateBuffer(&outputBufferDesc, NULL, &zBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for z buffer with return code %x\n", hr);
        return hr;
    }

    // Create an unordered access view for the "z" vector.  
    D3D11_UNORDERED_ACCESS_VIEW_DESC outputUAVDesc;
    outputUAVDesc.Buffer.FirstElement = 0;        
    outputUAVDesc.Buffer.Flags = 0;            
    outputUAVDesc.Buffer.NumElements = dimension;
    outputUAVDesc.Format = DXGI_FORMAT_UNKNOWN;    
    outputUAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;   
    ID3D11UnorderedAccessView* zBufferUAV;
    hr = device->CreateUnorderedAccessView(zBuffer, 
        &outputUAVDesc, &zBufferUAV);
    if (FAILED(hr))
    {
        printf("CreateUnorderedAccessView failed for z buffer with return code %x\n", hr);
        return hr;
    }

    // Create a staging buffer, which will be used to copy back from zBuffer.
    D3D11_BUFFER_DESC stagingBufferDesc;
    stagingBufferDesc.BindFlags = 0;
    stagingBufferDesc.Usage = D3D11_USAGE_STAGING;  
    stagingBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
    stagingBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    stagingBufferDesc.StructureByteStride = sizeof(float);
    stagingBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* stagingBuffer;
    hr = device->CreateBuffer(&stagingBufferDesc, NULL, &stagingBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for staging buffer with return code %x\n", hr);
        return hr;
    }

    // Create a constant buffer (this buffer is used to pass the constant 
    // value 'a' to the kernel as cbuffer Constants).
    D3D11_BUFFER_DESC cbDesc;
    cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
    cbDesc.Usage = D3D11_USAGE_DYNAMIC;  
    cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    cbDesc.MiscFlags = 0;
    // Even though the constant buffer only has one float, DX expects
    // ByteWidth to be a multiple of 4 floats (i.e., one 128-bit register).
    cbDesc.ByteWidth = sizeof(float)*4;
    ID3D11Buffer* constantBuffer = nullptr;
    hr = device->CreateBuffer( &cbDesc, NULL, &constantBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for constant buffer with return code %x\n", hr);
        return hr;
    }

    // Map the constant buffer and set the constant value 'a'.
    D3D11_MAPPED_SUBRESOURCE mappedResource;
    context->Map(constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* constants = reinterpret_cast<float*>(mappedResource.pData);
    constants[0] = a;
    constants = nullptr;
    context->Unmap(constantBuffer, 0);

    // Map the x buffer and copy our data into it.
    context->Map(xBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* xvalues = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(xvalues, &x[0], sizeof(float)*x.size());
    xvalues = nullptr;
    context->Unmap(xBuffer, 0);

    // Map the y buffer and copy our data into it.
    context->Map(yBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* yvalues = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(yvalues, &y[0], sizeof(float)*y.size());
    yvalues = nullptr;
    context->Unmap(yBuffer, 0);

    // Compile the compute shader into a blob.
    ID3DBlob* errorBlob = nullptr;
    ID3DBlob* shaderBlob = nullptr;
    hr = D3DX11CompileFromFile(L"kernel.hlsl", NULL, NULL, "saxpy", "cs_4_0",
        D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &shaderBlob, &errorBlob, NULL);
    if (FAILED(hr))
    {
        // Print out the error message if there is one.
        if (errorBlob)
        {
            char const* message = (char*)errorBlob->GetBufferPointer();
            printf("kernel.hlsl failed to compile; error message:\n");
            printf("%s\n", message);
            errorBlob->Release();
        }
        return hr;
    }

    // Create a shader object from the compiled blob.
    ID3D11ComputeShader* computeShader;
    hr = device->CreateComputeShader(shaderBlob->GetBufferPointer(), 
        shaderBlob->GetBufferSize(), NULL, &computeShader);
    if (FAILED(hr))
    {
        printf("CreateComputeShader failed with return code %x\n", hr);
        return hr;
    }

    // Make the shader active.
    context->CSSetShader(computeShader, NULL, 0);

    // Attach the z buffer to the output via its unordered access view.
    UINT initCounts = 0xFFFFFFFF;
    context->CSSetUnorderedAccessViews(0, 1, &zBufferUAV, &initCounts);

    // Attach the input buffers via their shader resource views.
    context->CSSetShaderResources(0, 1, &xSRV);
    context->CSSetShaderResources(1, 1, &ySRV);

    // Attach the constant buffer
    context->CSSetConstantBuffers(0, 1, &constantBuffer);

    // Execute the shader, in 'numGroups' groups of 'groupSize' threads each.
    context->Dispatch(numGroups, 1, 1);

    // Copy the z buffer to the staging buffer so that we can 
    // retrieve the data for accesss by the CPU.
    context->CopyResource(stagingBuffer, zBuffer);

    // Map the staging buffer for reading.
    context->Map(stagingBuffer, 0, D3D11_MAP_READ, 0, &mappedResource);
    float* zData = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(&z[0], zData, sizeof(float)*z.size());
    zData = nullptr;
    context->Unmap(stagingBuffer, 0);

    // Now compare the GPU results against expected values.
    bool resultOK = true;
    for (size_t i = 0; i < x.size(); ++ i)
    {
        // NOTE: This comparison assumes the GPU produces *exactly* the 
        // same result as the CPU.  In general, this will not be the case
        // with floating-point calculations.
        float const expected = a*x[i] + y[i];
        if (z[i] != expected)
        {
            printf("Unexpected result at position %lu: expected %.7e, got %.7e\n",
                i, expected, z[i]);
            resultOK = false;
        }
    }

    if (!resultOK)
    {
        printf("GPU results differed from the CPU results.\n");
        OutputDebugStringA("GPU results differed from the CPU results.\n");
        return 1;
    }

    printf("GPU output matched the CPU results.\n");
    OutputDebugStringA("GPU output matched the CPU results.\n");

    // Disconnect everything from the pipeline.
    ID3D11UnorderedAccessView* nullUAV = nullptr;
    context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, &initCounts);
    ID3D11ShaderResourceView* nullSRV = nullptr;
    context->CSSetShaderResources(0, 1, &nullSRV);
    context->CSSetShaderResources(1, 1, &nullSRV);
    ID3D11Buffer* nullBuffer = nullptr;
    context->CSSetConstantBuffers(0, 1, &nullBuffer);

    // Release resources.  Again, note that none of the error checks above
    // release resources that have been allocated up to this point, so the 
    // sample doesn't clean up after itself correctly unless everything succeeds.
    computeShader->Release();
    shaderBlob->Release();
    constantBuffer->Release();
    stagingBuffer->Release();
    zBufferUAV->Release();
    zBuffer->Release();
    xSRV->Release();
    xBuffer->Release();
    ySRV->Release();
    yBuffer->Release();
    context->Release();
    device->Release();

    return 0;
}
void RaytracingSecondaryRaysComputeShader::setParameters( ID3D11DeviceContext& deviceContext,
                                                          const Texture2DSpecBind< TexBind::UnorderedAccess_ShaderResource, float4 >& rayOriginsTexture,
                                                          const Texture2DSpecBind< TexBind::UnorderedAccess_ShaderResource, float4 >& rayDirectionsTexture, 
                                                          const BlockMesh& mesh, 
                                                          const float43& worldMatrix, 
                                                          const float3 boundingBoxMin, 
                                                          const float3 boundingBoxMax,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& alphaTexture,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, uchar4 >& emissiveTexture,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, uchar4 >& albedoTexture,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, uchar4 >& normalTexture,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& metalnessTexture,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& roughnessTexture,
                                                          const Texture2DSpecBind< TexBind::ShaderResource, unsigned char >& indexOfRefractionTexture,
                                                          const int outputTextureWidth, const int outputTextureHeight )
{
    if ( !m_compiled ) throw std::exception( "RaytracingSecondaryRaysComputeShader::setParameters - Shader hasn't been compiled yet." );

    { // Set input buffers and textures.
        const unsigned int resourceCount = 16;
        ID3D11ShaderResourceView* resources[ resourceCount ] = { 
            rayOriginsTexture.getShaderResourceView(),
            rayDirectionsTexture.getShaderResourceView(), 
            mesh.getVertexBufferResource(), 
            mesh.getNormalBufferResource(), 
            mesh.getTangentBufferResource(),
            !mesh.getTexcoordBufferResources().empty() ? mesh.getTexcoordBufferResources().front() : nullptr,
            mesh.getTriangleBufferResource(),
            mesh.getBvhTreeBufferNodesShaderResourceView().Get(),
            mesh.getBvhTreeBufferNodesExtentsShaderResourceView().Get(),
            //mesh.getBvhTreeBufferTrianglesShaderResourceView().Get(),
            alphaTexture.getShaderResourceView(),
            emissiveTexture.getShaderResourceView(),
            albedoTexture.getShaderResourceView(),
            normalTexture.getShaderResourceView(),
            metalnessTexture.getShaderResourceView(),
            roughnessTexture.getShaderResourceView(),
            indexOfRefractionTexture.getShaderResourceView()
        };

        deviceContext.CSSetShaderResources( 0, resourceCount, resources );
    }

    { // Set constant buffer.
        D3D11_MAPPED_SUBRESOURCE mappedResource;
        ConstantBuffer* dataPtr;

        HRESULT result = deviceContext.Map( m_constantInputBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource );
        if ( result < 0 ) throw std::exception( "RaytracingSecondaryRaysComputeShader::setParameters - mapping constant buffer to CPU memory failed." );

        dataPtr = (ConstantBuffer*)mappedResource.pData;

        dataPtr->localToWorldMatrix = float44( worldMatrix ).getTranspose(); // Transpose from row-major to column-major to fit each column in one register.
        dataPtr->worldToLocalMatrix = float44( worldMatrix.getScaleOrientationTranslationInverse() ).getTranspose(); // Transpose from row-major to column-major to fit each column in one register.
        dataPtr->boundingBoxMin     = boundingBoxMin;
        dataPtr->boundingBoxMax     = boundingBoxMax;
        dataPtr->outputTextureSize  = float2( (float)outputTextureWidth, (float)outputTextureHeight );

        // Padding.
        dataPtr->pad1 = 0.0f;
        dataPtr->pad2 = 0.0f;

        deviceContext.Unmap( m_constantInputBuffer.Get(), 0 );

        deviceContext.CSSetConstantBuffers( 0, 1, m_constantInputBuffer.GetAddressOf() );
    }

    { // Set texture sampler.
        deviceContext.CSSetSamplers( 0, 1, m_samplerState.GetAddressOf() );
    }
}