示例#1
0
    void init(ID3D11Device& device, Shaders& shaders)
    {
        assert(shaders.mTerrainDS == nullptr);
        assert(shaders.mTerrainVS == nullptr);
        assert(shaders.mTerrainHS == nullptr);
        assert(shaders.mTerrainIL == nullptr);
        assert(shaders.mTerrainPS == nullptr);
        assert(shaders.mTerrainCS == nullptr);

        // Store shader byte code, used to create a shader.
        std::vector<char> shaderByteCode;

        // Vertex shader
        computeShaderByteCode(L"HLSL/TerrainVS.cso", shaderByteCode);
        buildShapesVertexLayout(device, shaderByteCode, shaders.mTerrainIL);
        HRESULT result = device.CreateVertexShader(
            &shaderByteCode[0],
            shaderByteCode.size(),
            nullptr,
            &shaders.mTerrainVS);
        DxErrorChecker(result);

        // Pixel shader
        computeShaderByteCode(L"HLSL/TerrainPS.cso", shaderByteCode);        
        result = device.CreatePixelShader(
            &shaderByteCode[0], 
            shaderByteCode.size(), 
            nullptr, 
            &shaders.mTerrainPS);
        DxErrorChecker(result);

        // Hull shader
        computeShaderByteCode(L"HLSL/TerrainHS.cso", shaderByteCode);        
        result = device.CreateHullShader(
            &shaderByteCode[0], 
            shaderByteCode.size(), 
            nullptr, 
            &shaders.mTerrainHS);
        DxErrorChecker(result);

        // Pixel shader
        computeShaderByteCode(L"HLSL/TerrainDS.cso", shaderByteCode);        
        result = device.CreateDomainShader(
            &shaderByteCode[0], 
            shaderByteCode.size(), 
            nullptr, 
            &shaders.mTerrainDS);
        DxErrorChecker(result);

        // Compute Shader
        computeShaderByteCode(L"HLSL/TerrainCS.cso", shaderByteCode);        
        result = device.CreateComputeShader(
            &shaderByteCode[0], 
            shaderByteCode.size(), 
            nullptr, 
            &shaders.mTerrainCS);
        DxErrorChecker(result);
    }
vpResult vprShaderProgramDX11::init()
{
	vprDeviceDX11* dx11Device = static_cast<vprDeviceDX11*>(m_device);
	ID3D11Device* nativeDevice = dx11Device->getNativeDevice();

	if (m_desc.hasVertexShader())
	{
		if (nativeDevice->CreateVertexShader(m_desc.m_bytecodes[vprShaderStage::VERTEX_SHADER]->getPointer(),
				m_desc.m_bytecodes[vprShaderStage::VERTEX_SHADER]->getSize(), NULL, &m_nativeVertexShader))
		{
			return VP_FAILURE;
		}
	}
	if (m_desc.hasGeometryShader())
	{
		if (nativeDevice->CreateGeometryShader(m_desc.m_bytecodes[vprShaderStage::GEOMETRY_SHADER]->getPointer(),
				m_desc.m_bytecodes[vprShaderStage::GEOMETRY_SHADER]->getSize(), NULL, &m_nativeGeometryShader))
		{
			return VP_FAILURE;
		}
	}
	if (m_desc.hasHullShader())
	{
		if (nativeDevice->CreateHullShader(m_desc.m_bytecodes[vprShaderStage::HULL_SHADER]->getPointer(),
				m_desc.m_bytecodes[vprShaderStage::HULL_SHADER]->getSize(), NULL, &m_nativeHullShader))
		{
			return VP_FAILURE;
		}
	}
	if (m_desc.hasDomainShader())
	{
		if (nativeDevice->CreateDomainShader(m_desc.m_bytecodes[vprShaderStage::DOMAIN_SHADER]->getPointer(),
				m_desc.m_bytecodes[vprShaderStage::DOMAIN_SHADER]->getSize(), NULL, &m_nativeDomainShader))
		{
			return VP_FAILURE;
		}
	}
	if (m_desc.hasPixelShader())
	{
		if (nativeDevice->CreatePixelShader(m_desc.m_bytecodes[vprShaderStage::PIXEL_SHADER]->getPointer(),
				m_desc.m_bytecodes[vprShaderStage::PIXEL_SHADER]->getSize(), NULL, &m_nativePixelShader))
		{
			return VP_FAILURE;
		}
	}
	if (m_desc.hasComputeShader())
	{
		if (nativeDevice->CreateComputeShader(m_desc.m_bytecodes[vprShaderStage::COMPUTE_SHADER]->getPointer(),
				m_desc.m_bytecodes[vprShaderStage::COMPUTE_SHADER]->getSize(), NULL, &m_nativeComputeShader))
		{
			return VP_FAILURE;
		}
	}

	return VP_SUCCESS;
}
示例#3
0
	bool Shader::create( GraphicsSystem& graphicsSystem, ShaderType type, const void* pInitData, uint dataSize )
	{
		TIKI_ASSERT( pInitData != nullptr );
		TIKI_ASSERT( dataSize > 0u );

		m_platformData.pShaderCode		= pInitData;
		m_platformData.shaderCodeLength	= dataSize;

		m_type	= type;
		m_hash	= crcBytes( m_platformData.pShaderCode, m_platformData.shaderCodeLength );

		ID3D11Device* pDevice = GraphicsSystemPlatform::getDevice( graphicsSystem );

		HRESULT result = S_FALSE;
		switch ( m_type )
		{
		case ShaderType_ComputeShader:
			result = pDevice->CreateComputeShader( pInitData, dataSize, nullptr, &m_platformData.pComputeShader );
			break;
		case ShaderType_DomainShader:
			result = pDevice->CreateDomainShader( pInitData, dataSize, nullptr, &m_platformData.pDomainShader );
			break;
		case ShaderType_GeometrieShader:
			result = pDevice->CreateGeometryShader( pInitData, dataSize, nullptr, &m_platformData.pGeometryShader );
			break;
		case ShaderType_HullShader:
			result = pDevice->CreateHullShader( pInitData, dataSize, nullptr, &m_platformData.pHullShader );
			break;
		case ShaderType_PixelShader:
			result = pDevice->CreatePixelShader( pInitData, dataSize, nullptr, &m_platformData.pPixelShader );
			break;
		case ShaderType_VertexShader:
			result = pDevice->CreateVertexShader( pInitData, dataSize, nullptr, &m_platformData.pVertexShader );
			break;
		default:
			TIKI_BREAK( "[graphics] ShaderType not supported.\n" );
			break;
		}

		if ( FAILED( result ) || m_platformData.pShaderObject == nullptr )
		{
			dispose( graphicsSystem );
			return false;
		}

		return true;
	}
示例#4
0
void AaVoxelScene::initScene(int size)
{
	ID3D11Device* mDevice = mSceneMgr->getRenderSystem()->getDevice();

	this->size = size;

	D3D11_TEXTURE3D_DESC desc;
	ZeroMemory( &desc, sizeof( desc ) );
	desc.Usage = D3D11_USAGE_DEFAULT;
	desc.Depth = desc.Height = desc.Width = size;
	desc.MiscFlags = D3D11_RESOURCE_MISC_GENERATE_MIPS;
	desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
	desc.CPUAccessFlags = 0;
	desc.MipLevels = 0;


	//FINAL
	desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;//DXGI_FORMAT_R8G8B8A8_UNORM;
	createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &finalVoxelTexture, &fVoxelUAV, &fVoxelSRV);

	//NORMAL
	desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
	createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &voxelNormalTexture, &voxelNormalUAV, &voxelNormalSRV);

	//PAST BOUNCES
	desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
	createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &bounceVoxelTexture, &bVoxelUAV, &bVoxelSRV);

	//SHADOW
	desc.Format = DXGI_FORMAT_R32_FLOAT;
	createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &voxelShadowTexture, &voxelShadowUAV, &voxelShadowSRV);

	//CAUSTICS	
	ZeroMemory( &desc, sizeof( desc ) );
	desc.Usage = D3D11_USAGE_DEFAULT;
	desc.Depth = desc.Height = desc.Width = size;
	desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
	desc.CPUAccessFlags = 0;
	desc.MipLevels = 1;
	desc.MiscFlags = 0;
	desc.Format = DXGI_FORMAT_R32_FLOAT;
	createUAVTexture(size, desc, 1, mSceneMgr, mDevice, &causticsVoxelTexture, &voxelCausticUAV, &voxelCausticSRV);



	//CS test

	ID3DBlob* csBuffer = 0;
	ID3DBlob* errorBuffer = 0;

	DWORD shaderFlags = D3D10_SHADER_ENABLE_STRICTNESS;
#if defined( DEBUG ) || defined( _DEBUG )
	shaderFlags |= D3DCOMPILE_DEBUG;
#endif

	std::string str,strTotal;
	std::ifstream in;
	in.open("../data/gfx/mipVoxels.cs");
	getline(in,str);

	while ( in ) 
	{
		strTotal += str+"\n";
		getline(in,str);
	}

	HRESULT result = D3DCompile(strTotal.c_str(),strTotal.length(),"mipVoxels.cs",0,0,"CSMain" , "cs_5_0" ,	shaderFlags, 0, &csBuffer, &errorBuffer );

	if( FAILED( result ) )
	{
		if( errorBuffer != 0 )
		{
			OutputDebugStringA(( char* )errorBuffer->GetBufferPointer( ));
			std::string errorMessage=( char* )errorBuffer->GetBufferPointer( );
			AaLogger::getLogger()->writeMessage("ERROR "+errorMessage);
			errorBuffer->Release();
		}
	}

	if( errorBuffer != 0 )
		errorBuffer->Release();

	result = mDevice->CreateComputeShader( csBuffer->GetBufferPointer( ),
		csBuffer->GetBufferSize( ), 0, &csMipVoxels);
	csBuffer->Release();

	//mSceneMgr->getMaterialLoader()->addTextureResource(cVoxelSRV,"voxelScene");
	//mSceneMgr->getMaterialLoader()->addUAV(cVoxelUAV,"voxelScene");

	mSceneMgr->getMaterialLoader()->addTextureResource(bVoxelSRV,"previousBounces");
	mSceneMgr->getMaterialLoader()->addUAV(bVoxelUAV,"previousBounces");

	mSceneMgr->getMaterialLoader()->addTextureResource(fVoxelSRV,"fVoxelScene");
	mSceneMgr->getMaterialLoader()->addUAV(fVoxelUAV,"fVoxelScene");
	mSceneMgr->getMaterialLoader()->addTextureResource(voxelShadowSRV,"voxelShadowScene");
	mSceneMgr->getMaterialLoader()->addUAV(voxelShadowUAV,"voxelShadowScene");
	mSceneMgr->getMaterialLoader()->addTextureResource(voxelCausticSRV,"voxelCausticScene");
	mSceneMgr->getMaterialLoader()->addUAV(voxelCausticUAV,"voxelCausticScene");
	mSceneMgr->getMaterialLoader()->addTextureResource(voxelNormalSRV,"voxelNormalScene");
	mSceneMgr->getMaterialLoader()->addUAV(voxelNormalUAV,"voxelNormalScene");
}
示例#5
0
	//----------------------------------------------------------------------------------------------------
	bool EEBlurC::InitializeBlurC()
	{
		if (!s_isBlurCInitialized)
		{
			HRESULT result;
			ID3D11Device* device = EECore::s_EECore->GetDevice();
			ID3D11DeviceContext* deviceContext = EECore::s_EECore->GetDeviceContext();

			// BlurBuffer
			D3D11_BUFFER_DESC bufferDesc;
			bufferDesc.ByteWidth = sizeof(EEBlurBufferDesc);
			bufferDesc.Usage = D3D11_USAGE_DYNAMIC;
			bufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
			bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
			bufferDesc.MiscFlags = 0;
			bufferDesc.StructureByteStride = 0;
			result = device->CreateBuffer(&bufferDesc, NULL, &s_blurBuffer);
			if (FAILED(result))
				return false;

			ID3D10Blob *errorMessage = nullptr;
			ID3D10Blob *computeShaderBuffer = nullptr;
			// CS
			result = D3DX11CompileFromFileW(L"EEShader\\EEBlurShader.hlsl", NULL, NULL, "BlurHorzCS", "cs_5_0", D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &computeShaderBuffer, &errorMessage, NULL);
			if (FAILED(result))
			{
				if (errorMessage)
					MessageBoxA(NULL, (char*)errorMessage->GetBufferPointer(), "Compile Shader Error!", MB_OK);
				SAFE_RELEASE(errorMessage);
				SAFE_RELEASE(computeShaderBuffer);
				return false;
			}
			result = device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), computeShaderBuffer->GetBufferSize(), NULL, &s_blurHorzCS);
			if (FAILED(result))
			{
				SAFE_RELEASE(errorMessage);
				SAFE_RELEASE(computeShaderBuffer);
				return false;
			}
			result = D3DX11CompileFromFileW(L"EEShader\\EEBlurShader.hlsl", NULL, NULL, "BlurVertCS", "cs_5_0", D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &computeShaderBuffer, &errorMessage, NULL);
			if (FAILED(result))
			{
				if (errorMessage)
					MessageBoxA(NULL, (char*)errorMessage->GetBufferPointer(), "Compile Shader Error!", MB_OK);
				SAFE_RELEASE(errorMessage);
				SAFE_RELEASE(computeShaderBuffer);
				return false;
			}
			result = device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), computeShaderBuffer->GetBufferSize(), NULL, &s_blurVertCS);
			if (FAILED(result))
			{
				SAFE_RELEASE(errorMessage);
				SAFE_RELEASE(computeShaderBuffer);
				return false;
			}

			s_isBlurCInitialized = true;
		}

		return true;
	}
bool
D3D11ComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
                               BufferDescriptor const &dstDesc,
                               ID3D11DeviceContext *deviceContext) {

    if (srcDesc.length > dstDesc.length) {
        Far::Error(Far::FAR_RUNTIME_ERROR,
                   "srcDesc length must be less than or equal to "
                   "dstDesc length.\n");
        return false;
    }

    DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#if defined(D3D10_SHADER_RESOURCES_MAY_ALIAS)
     dwShaderFlags |= D3D10_SHADER_RESOURCES_MAY_ALIAS;
#endif

#ifdef _DEBUG
    dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif

    std::ostringstream ss;
    ss << srcDesc.length;  std::string lengthValue(ss.str()); ss.str("");
    ss << srcDesc.stride;  std::string srcStrideValue(ss.str()); ss.str("");
    ss << dstDesc.stride;  std::string dstStrideValue(ss.str()); ss.str("");
    ss << _workGroupSize;  std::string workgroupSizeValue(ss.str()); ss.str("");

    D3D_SHADER_MACRO defines[] =
        { "LENGTH", lengthValue.c_str(),
          "SRC_STRIDE", srcStrideValue.c_str(),
          "DST_STRIDE", dstStrideValue.c_str(),
          "WORK_GROUP_SIZE", workgroupSizeValue.c_str(),
          0, 0 };

    ID3DBlob * computeShaderBuffer = NULL;
    ID3DBlob * errorBuffer = NULL;

    HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource),
                            NULL, &defines[0], NULL,
                            "cs_main", "cs_5_0",
                            dwShaderFlags, 0,
                            &computeShaderBuffer, &errorBuffer);
    if (FAILED(hr)) {
        if (errorBuffer != NULL) {
            Far::Error(Far::FAR_RUNTIME_ERROR,
                       "Error compiling HLSL shader: %s\n",
                       (CHAR*)errorBuffer->GetBufferPointer());
            errorBuffer->Release();
            return false;
        }
    }

    ID3D11Device *device = NULL;
    deviceContext->GetDevice(&device);
    assert(device);

    device->CreateClassLinkage(&_classLinkage);
    assert(_classLinkage);

    device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(),
                                computeShaderBuffer->GetBufferSize(),
                                _classLinkage,
                                &_computeShader);
    assert(_computeShader);

    ID3D11ShaderReflection *reflector;
    D3DReflect(computeShaderBuffer->GetBufferPointer(),
               computeShaderBuffer->GetBufferSize(),
               IID_ID3D11ShaderReflection, (void**) &reflector);
    assert(reflector);

    assert(reflector->GetNumInterfaceSlots() == 1);
    reflector->Release();

    computeShaderBuffer->Release();

    _classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel);
    assert(_singleBufferKernel);
    _classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel);
    assert(_separateBufferKernel);

    D3D11_BUFFER_DESC cbDesc;
    ZeroMemory(&cbDesc, sizeof(cbDesc));
    cbDesc.Usage = D3D11_USAGE_DYNAMIC;
    cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
    cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    cbDesc.MiscFlags = 0;
    cbDesc.ByteWidth = sizeof(KernelUniformArgs);
    device->CreateBuffer(&cbDesc, NULL, &_uniformArgs);

    return true;
}
示例#7
0
int main()
{
    // Create Device
    const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1 };

    UINT createDeviceFlags = 0;
#ifdef _DEBUG
    createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif

    /* ================================== */
    printf("Creating device...");
    ID3D11Device* device = nullptr;
    HRESULT hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, lvl, _countof(lvl), D3D11_SDK_VERSION, &device, nullptr, nullptr);
    if (FAILED(hr))
    {
        printf("Failed creating Direct3D 11 device %08X\n", hr);
        return returnDBG(-1);
    }

    /* ================================== */
    printf("Compile Compute Shader...");
    ID3DBlob *csBlob = nullptr;
    hr = CompileComputeShader(L"ComputeShader.hlsl", "main", device, &csBlob);
    if (FAILED(hr))
    {
        device->Release();
        printf("Failed compiling shader %08X\n", hr);
        return returnDBG(-1);
    }

    /* ================================== */
    printf("Create Compute Shader...");
    ID3D11ComputeShader* computeShader = nullptr;
    hr = device->CreateComputeShader(csBlob->GetBufferPointer(), csBlob->GetBufferSize(), nullptr, &computeShader);
            
    /* ================================== */
    printf("Creating buffers and filling them with initial data...");
    for (int i = 0; i < NUM_ELEMENTS; ++i)
    {
        g_vBuf0[i].i = i;
        g_vBuf0[i].f = (float)i;

        g_vBuf1[i].i = i;
        g_vBuf1[i].f = (float)i;
    }

    CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), &g_vBuf0[0], &g_pBuf0);
    CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), &g_vBuf1[0], &g_pBuf1);
    CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), nullptr, &g_pBufResult);

    /* ================================== */
    printf("Running Compute Shader...");
    ID3D11ShaderResourceView* aRViews[2] = { g_pBuf0SRV, g_pBuf1SRV };
    RunComputeShader(g_pContext, g_pCS, 2, aRViews, nullptr, nullptr, 0, g_pBufResultUAV, NUM_ELEMENTS, 1, 1);
    printf("done\n");
    
    
    
    csBlob->Release();

    if (FAILED(hr))
    {
        device->Release();
    }

    printf("Success\n");



    // Clean up
    computeShader->Release();

    device->Release();

    return returnDBG(0);
}
int _tmain(int /*argc*/, _TCHAR* /*argv[]*/)
{
    // GROUP_SIZE_X defined in kernel.hlsl must match the 
    // groupSize declared here.
    size_t const groupSize = 512;
    size_t const numGroups = 16;
    size_t const dimension = numGroups*groupSize;

    // Create a D3D11 device and immediate context. 
    // TODO: The code below uses the default video adapter, with the
    // default set of feature levels.  Please see the MSDN docs if 
    // you wish to control which adapter and feature level are used.
    D3D_FEATURE_LEVEL featureLevel;
    ID3D11Device* device = nullptr;
    ID3D11DeviceContext* context = nullptr;
    HRESULT hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 
        NULL, NULL, 0, D3D11_SDK_VERSION, &device, 
        &featureLevel, &context);
    if (FAILED(hr))
    {
        printf("D3D11CreateDevice failed with return code %x\n", hr);
        return hr;
    }

    // Create system memory and fill it with our initial data.  Note that
    // these data structures aren't really necessary , it's just a demonstration
    // of how you can take existing data structures you might have and copy
    // their data to/from GPU computations.
    std::vector<float> x(dimension);
    std::vector<float> y(dimension);
    std::vector<float> z(dimension);
    float const a = 2.0f;
    for (size_t i = 0; i < dimension; ++ i)
    {
        x[i] = static_cast<float>(i);
        y[i] = 100 - static_cast<float>(i);
    }

    // Create structured buffers for the "x" and "y" vectors.
    D3D11_BUFFER_DESC inputBufferDesc;
    inputBufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
    // The buffers are read-only by the GPU, writeable by the CPU.
    // TODO: If you will never again upate the data in a GPU buffer,
    // you might want to look at using a D3D11_SUBRESOURCE_DATA here to
    // provide the initialization data instead of doing the mapping 
    // and copying that happens below.
    inputBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
    inputBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    inputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    inputBufferDesc.StructureByteStride = sizeof(float);
    inputBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* xBuffer = nullptr;
    hr = device->CreateBuffer(&inputBufferDesc, NULL, &xBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for x buffer with return code %x\n", hr);
        return hr;
    }
    // We can re-use inputBufferDesc here because the layout and usage of the x
    // and y buffers is exactly the same.
    ID3D11Buffer* yBuffer = nullptr;
    hr = device->CreateBuffer(&inputBufferDesc, NULL, &yBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for x buffer with return code %x\n", hr);
        return hr;
    }

    // Create shader resource views for the "x" and "y" buffers.
    // TODO: You can optionally provide a D3D11_SHADER_RESOURCE_VIEW_DESC
    // as the second parameter if you need to use only part of the buffer
    // inside the compute shader.
    ID3D11ShaderResourceView* xSRV = nullptr;
    hr = device->CreateShaderResourceView(xBuffer, NULL, &xSRV);
    if (FAILED(hr))
    {
        printf("CreateShaderResourceView failed for x buffer with return code %x\n", hr);
        return hr;
    }

    ID3D11ShaderResourceView* ySRV = nullptr;
    hr = device->CreateShaderResourceView(yBuffer, NULL, &ySRV);
    if (FAILED(hr))
    {
        printf("CreateShaderResourceView failed for y buffer with return code %x\n", hr);
        return hr;
    }

    // Create a structured buffer for the "z" vector.  This buffer needs to be 
    // writeable by the GPU, so we can't create it with CPU read/write access.
    D3D11_BUFFER_DESC outputBufferDesc;
    outputBufferDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS 
        | D3D11_BIND_SHADER_RESOURCE;
    outputBufferDesc.Usage = D3D11_USAGE_DEFAULT;
    outputBufferDesc.CPUAccessFlags = 0;
    outputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    outputBufferDesc.StructureByteStride = sizeof(float);
    outputBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* zBuffer = nullptr;
    hr = device->CreateBuffer(&outputBufferDesc, NULL, &zBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for z buffer with return code %x\n", hr);
        return hr;
    }

    // Create an unordered access view for the "z" vector.  
    D3D11_UNORDERED_ACCESS_VIEW_DESC outputUAVDesc;
    outputUAVDesc.Buffer.FirstElement = 0;        
    outputUAVDesc.Buffer.Flags = 0;            
    outputUAVDesc.Buffer.NumElements = dimension;
    outputUAVDesc.Format = DXGI_FORMAT_UNKNOWN;    
    outputUAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;   
    ID3D11UnorderedAccessView* zBufferUAV;
    hr = device->CreateUnorderedAccessView(zBuffer, 
        &outputUAVDesc, &zBufferUAV);
    if (FAILED(hr))
    {
        printf("CreateUnorderedAccessView failed for z buffer with return code %x\n", hr);
        return hr;
    }

    // Create a staging buffer, which will be used to copy back from zBuffer.
    D3D11_BUFFER_DESC stagingBufferDesc;
    stagingBufferDesc.BindFlags = 0;
    stagingBufferDesc.Usage = D3D11_USAGE_STAGING;  
    stagingBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
    stagingBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    stagingBufferDesc.StructureByteStride = sizeof(float);
    stagingBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* stagingBuffer;
    hr = device->CreateBuffer(&stagingBufferDesc, NULL, &stagingBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for staging buffer with return code %x\n", hr);
        return hr;
    }

    // Create a constant buffer (this buffer is used to pass the constant 
    // value 'a' to the kernel as cbuffer Constants).
    D3D11_BUFFER_DESC cbDesc;
    cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
    cbDesc.Usage = D3D11_USAGE_DYNAMIC;  
    cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    cbDesc.MiscFlags = 0;
    // Even though the constant buffer only has one float, DX expects
    // ByteWidth to be a multiple of 4 floats (i.e., one 128-bit register).
    cbDesc.ByteWidth = sizeof(float)*4;
    ID3D11Buffer* constantBuffer = nullptr;
    hr = device->CreateBuffer( &cbDesc, NULL, &constantBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for constant buffer with return code %x\n", hr);
        return hr;
    }

    // Map the constant buffer and set the constant value 'a'.
    D3D11_MAPPED_SUBRESOURCE mappedResource;
    context->Map(constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* constants = reinterpret_cast<float*>(mappedResource.pData);
    constants[0] = a;
    constants = nullptr;
    context->Unmap(constantBuffer, 0);

    // Map the x buffer and copy our data into it.
    context->Map(xBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* xvalues = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(xvalues, &x[0], sizeof(float)*x.size());
    xvalues = nullptr;
    context->Unmap(xBuffer, 0);

    // Map the y buffer and copy our data into it.
    context->Map(yBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* yvalues = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(yvalues, &y[0], sizeof(float)*y.size());
    yvalues = nullptr;
    context->Unmap(yBuffer, 0);

    // Compile the compute shader into a blob.
    ID3DBlob* errorBlob = nullptr;
    ID3DBlob* shaderBlob = nullptr;
    hr = D3DX11CompileFromFile(L"kernel.hlsl", NULL, NULL, "saxpy", "cs_4_0",
        D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &shaderBlob, &errorBlob, NULL);
    if (FAILED(hr))
    {
        // Print out the error message if there is one.
        if (errorBlob)
        {
            char const* message = (char*)errorBlob->GetBufferPointer();
            printf("kernel.hlsl failed to compile; error message:\n");
            printf("%s\n", message);
            errorBlob->Release();
        }
        return hr;
    }

    // Create a shader object from the compiled blob.
    ID3D11ComputeShader* computeShader;
    hr = device->CreateComputeShader(shaderBlob->GetBufferPointer(), 
        shaderBlob->GetBufferSize(), NULL, &computeShader);
    if (FAILED(hr))
    {
        printf("CreateComputeShader failed with return code %x\n", hr);
        return hr;
    }

    // Make the shader active.
    context->CSSetShader(computeShader, NULL, 0);

    // Attach the z buffer to the output via its unordered access view.
    UINT initCounts = 0xFFFFFFFF;
    context->CSSetUnorderedAccessViews(0, 1, &zBufferUAV, &initCounts);

    // Attach the input buffers via their shader resource views.
    context->CSSetShaderResources(0, 1, &xSRV);
    context->CSSetShaderResources(1, 1, &ySRV);

    // Attach the constant buffer
    context->CSSetConstantBuffers(0, 1, &constantBuffer);

    // Execute the shader, in 'numGroups' groups of 'groupSize' threads each.
    context->Dispatch(numGroups, 1, 1);

    // Copy the z buffer to the staging buffer so that we can 
    // retrieve the data for accesss by the CPU.
    context->CopyResource(stagingBuffer, zBuffer);

    // Map the staging buffer for reading.
    context->Map(stagingBuffer, 0, D3D11_MAP_READ, 0, &mappedResource);
    float* zData = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(&z[0], zData, sizeof(float)*z.size());
    zData = nullptr;
    context->Unmap(stagingBuffer, 0);

    // Now compare the GPU results against expected values.
    bool resultOK = true;
    for (size_t i = 0; i < x.size(); ++ i)
    {
        // NOTE: This comparison assumes the GPU produces *exactly* the 
        // same result as the CPU.  In general, this will not be the case
        // with floating-point calculations.
        float const expected = a*x[i] + y[i];
        if (z[i] != expected)
        {
            printf("Unexpected result at position %lu: expected %.7e, got %.7e\n",
                i, expected, z[i]);
            resultOK = false;
        }
    }

    if (!resultOK)
    {
        printf("GPU results differed from the CPU results.\n");
        OutputDebugStringA("GPU results differed from the CPU results.\n");
        return 1;
    }

    printf("GPU output matched the CPU results.\n");
    OutputDebugStringA("GPU output matched the CPU results.\n");

    // Disconnect everything from the pipeline.
    ID3D11UnorderedAccessView* nullUAV = nullptr;
    context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, &initCounts);
    ID3D11ShaderResourceView* nullSRV = nullptr;
    context->CSSetShaderResources(0, 1, &nullSRV);
    context->CSSetShaderResources(1, 1, &nullSRV);
    ID3D11Buffer* nullBuffer = nullptr;
    context->CSSetConstantBuffers(0, 1, &nullBuffer);

    // Release resources.  Again, note that none of the error checks above
    // release resources that have been allocated up to this point, so the 
    // sample doesn't clean up after itself correctly unless everything succeeds.
    computeShader->Release();
    shaderBlob->Release();
    constantBuffer->Release();
    stagingBuffer->Release();
    zBufferUAV->Release();
    zBuffer->Release();
    xSRV->Release();
    xBuffer->Release();
    ySRV->Release();
    yBuffer->Release();
    context->Release();
    device->Release();

    return 0;
}