void init(ID3D11Device& device, Shaders& shaders) { assert(shaders.mTerrainDS == nullptr); assert(shaders.mTerrainVS == nullptr); assert(shaders.mTerrainHS == nullptr); assert(shaders.mTerrainIL == nullptr); assert(shaders.mTerrainPS == nullptr); assert(shaders.mTerrainCS == nullptr); // Store shader byte code, used to create a shader. std::vector<char> shaderByteCode; // Vertex shader computeShaderByteCode(L"HLSL/TerrainVS.cso", shaderByteCode); buildShapesVertexLayout(device, shaderByteCode, shaders.mTerrainIL); HRESULT result = device.CreateVertexShader( &shaderByteCode[0], shaderByteCode.size(), nullptr, &shaders.mTerrainVS); DxErrorChecker(result); // Pixel shader computeShaderByteCode(L"HLSL/TerrainPS.cso", shaderByteCode); result = device.CreatePixelShader( &shaderByteCode[0], shaderByteCode.size(), nullptr, &shaders.mTerrainPS); DxErrorChecker(result); // Hull shader computeShaderByteCode(L"HLSL/TerrainHS.cso", shaderByteCode); result = device.CreateHullShader( &shaderByteCode[0], shaderByteCode.size(), nullptr, &shaders.mTerrainHS); DxErrorChecker(result); // Pixel shader computeShaderByteCode(L"HLSL/TerrainDS.cso", shaderByteCode); result = device.CreateDomainShader( &shaderByteCode[0], shaderByteCode.size(), nullptr, &shaders.mTerrainDS); DxErrorChecker(result); // Compute Shader computeShaderByteCode(L"HLSL/TerrainCS.cso", shaderByteCode); result = device.CreateComputeShader( &shaderByteCode[0], shaderByteCode.size(), nullptr, &shaders.mTerrainCS); DxErrorChecker(result); }
vpResult vprShaderProgramDX11::init() { vprDeviceDX11* dx11Device = static_cast<vprDeviceDX11*>(m_device); ID3D11Device* nativeDevice = dx11Device->getNativeDevice(); if (m_desc.hasVertexShader()) { if (nativeDevice->CreateVertexShader(m_desc.m_bytecodes[vprShaderStage::VERTEX_SHADER]->getPointer(), m_desc.m_bytecodes[vprShaderStage::VERTEX_SHADER]->getSize(), NULL, &m_nativeVertexShader)) { return VP_FAILURE; } } if (m_desc.hasGeometryShader()) { if (nativeDevice->CreateGeometryShader(m_desc.m_bytecodes[vprShaderStage::GEOMETRY_SHADER]->getPointer(), m_desc.m_bytecodes[vprShaderStage::GEOMETRY_SHADER]->getSize(), NULL, &m_nativeGeometryShader)) { return VP_FAILURE; } } if (m_desc.hasHullShader()) { if (nativeDevice->CreateHullShader(m_desc.m_bytecodes[vprShaderStage::HULL_SHADER]->getPointer(), m_desc.m_bytecodes[vprShaderStage::HULL_SHADER]->getSize(), NULL, &m_nativeHullShader)) { return VP_FAILURE; } } if (m_desc.hasDomainShader()) { if (nativeDevice->CreateDomainShader(m_desc.m_bytecodes[vprShaderStage::DOMAIN_SHADER]->getPointer(), m_desc.m_bytecodes[vprShaderStage::DOMAIN_SHADER]->getSize(), NULL, &m_nativeDomainShader)) { return VP_FAILURE; } } if (m_desc.hasPixelShader()) { if (nativeDevice->CreatePixelShader(m_desc.m_bytecodes[vprShaderStage::PIXEL_SHADER]->getPointer(), m_desc.m_bytecodes[vprShaderStage::PIXEL_SHADER]->getSize(), NULL, &m_nativePixelShader)) { return VP_FAILURE; } } if (m_desc.hasComputeShader()) { if (nativeDevice->CreateComputeShader(m_desc.m_bytecodes[vprShaderStage::COMPUTE_SHADER]->getPointer(), m_desc.m_bytecodes[vprShaderStage::COMPUTE_SHADER]->getSize(), NULL, &m_nativeComputeShader)) { return VP_FAILURE; } } return VP_SUCCESS; }
bool Shader::create( GraphicsSystem& graphicsSystem, ShaderType type, const void* pInitData, uint dataSize ) { TIKI_ASSERT( pInitData != nullptr ); TIKI_ASSERT( dataSize > 0u ); m_platformData.pShaderCode = pInitData; m_platformData.shaderCodeLength = dataSize; m_type = type; m_hash = crcBytes( m_platformData.pShaderCode, m_platformData.shaderCodeLength ); ID3D11Device* pDevice = GraphicsSystemPlatform::getDevice( graphicsSystem ); HRESULT result = S_FALSE; switch ( m_type ) { case ShaderType_ComputeShader: result = pDevice->CreateComputeShader( pInitData, dataSize, nullptr, &m_platformData.pComputeShader ); break; case ShaderType_DomainShader: result = pDevice->CreateDomainShader( pInitData, dataSize, nullptr, &m_platformData.pDomainShader ); break; case ShaderType_GeometrieShader: result = pDevice->CreateGeometryShader( pInitData, dataSize, nullptr, &m_platformData.pGeometryShader ); break; case ShaderType_HullShader: result = pDevice->CreateHullShader( pInitData, dataSize, nullptr, &m_platformData.pHullShader ); break; case ShaderType_PixelShader: result = pDevice->CreatePixelShader( pInitData, dataSize, nullptr, &m_platformData.pPixelShader ); break; case ShaderType_VertexShader: result = pDevice->CreateVertexShader( pInitData, dataSize, nullptr, &m_platformData.pVertexShader ); break; default: TIKI_BREAK( "[graphics] ShaderType not supported.\n" ); break; } if ( FAILED( result ) || m_platformData.pShaderObject == nullptr ) { dispose( graphicsSystem ); return false; } return true; }
void AaVoxelScene::initScene(int size) { ID3D11Device* mDevice = mSceneMgr->getRenderSystem()->getDevice(); this->size = size; D3D11_TEXTURE3D_DESC desc; ZeroMemory( &desc, sizeof( desc ) ); desc.Usage = D3D11_USAGE_DEFAULT; desc.Depth = desc.Height = desc.Width = size; desc.MiscFlags = D3D11_RESOURCE_MISC_GENERATE_MIPS; desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; desc.CPUAccessFlags = 0; desc.MipLevels = 0; //FINAL desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;//DXGI_FORMAT_R8G8B8A8_UNORM; createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &finalVoxelTexture, &fVoxelUAV, &fVoxelSRV); //NORMAL desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &voxelNormalTexture, &voxelNormalUAV, &voxelNormalSRV); //PAST BOUNCES desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &bounceVoxelTexture, &bVoxelUAV, &bVoxelSRV); //SHADOW desc.Format = DXGI_FORMAT_R32_FLOAT; createUAVTexture(size, desc, -1, mSceneMgr, mDevice, &voxelShadowTexture, &voxelShadowUAV, &voxelShadowSRV); //CAUSTICS ZeroMemory( &desc, sizeof( desc ) ); desc.Usage = D3D11_USAGE_DEFAULT; desc.Depth = desc.Height = desc.Width = size; desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; desc.CPUAccessFlags = 0; desc.MipLevels = 1; desc.MiscFlags = 0; desc.Format = DXGI_FORMAT_R32_FLOAT; createUAVTexture(size, desc, 1, mSceneMgr, mDevice, &causticsVoxelTexture, &voxelCausticUAV, &voxelCausticSRV); //CS test ID3DBlob* csBuffer = 0; ID3DBlob* errorBuffer = 0; DWORD shaderFlags = D3D10_SHADER_ENABLE_STRICTNESS; #if defined( DEBUG ) || defined( _DEBUG ) shaderFlags |= D3DCOMPILE_DEBUG; #endif std::string str,strTotal; std::ifstream in; in.open("../data/gfx/mipVoxels.cs"); getline(in,str); while ( in ) { strTotal += str+"\n"; getline(in,str); } HRESULT result = D3DCompile(strTotal.c_str(),strTotal.length(),"mipVoxels.cs",0,0,"CSMain" , "cs_5_0" , shaderFlags, 0, &csBuffer, &errorBuffer ); if( FAILED( result ) ) { if( errorBuffer != 0 ) { OutputDebugStringA(( char* )errorBuffer->GetBufferPointer( )); std::string errorMessage=( char* )errorBuffer->GetBufferPointer( ); AaLogger::getLogger()->writeMessage("ERROR "+errorMessage); errorBuffer->Release(); } } if( errorBuffer != 0 ) errorBuffer->Release(); result = mDevice->CreateComputeShader( csBuffer->GetBufferPointer( ), csBuffer->GetBufferSize( ), 0, &csMipVoxels); csBuffer->Release(); //mSceneMgr->getMaterialLoader()->addTextureResource(cVoxelSRV,"voxelScene"); //mSceneMgr->getMaterialLoader()->addUAV(cVoxelUAV,"voxelScene"); mSceneMgr->getMaterialLoader()->addTextureResource(bVoxelSRV,"previousBounces"); mSceneMgr->getMaterialLoader()->addUAV(bVoxelUAV,"previousBounces"); mSceneMgr->getMaterialLoader()->addTextureResource(fVoxelSRV,"fVoxelScene"); mSceneMgr->getMaterialLoader()->addUAV(fVoxelUAV,"fVoxelScene"); mSceneMgr->getMaterialLoader()->addTextureResource(voxelShadowSRV,"voxelShadowScene"); mSceneMgr->getMaterialLoader()->addUAV(voxelShadowUAV,"voxelShadowScene"); mSceneMgr->getMaterialLoader()->addTextureResource(voxelCausticSRV,"voxelCausticScene"); mSceneMgr->getMaterialLoader()->addUAV(voxelCausticUAV,"voxelCausticScene"); mSceneMgr->getMaterialLoader()->addTextureResource(voxelNormalSRV,"voxelNormalScene"); mSceneMgr->getMaterialLoader()->addUAV(voxelNormalUAV,"voxelNormalScene"); }
//---------------------------------------------------------------------------------------------------- bool EEBlurC::InitializeBlurC() { if (!s_isBlurCInitialized) { HRESULT result; ID3D11Device* device = EECore::s_EECore->GetDevice(); ID3D11DeviceContext* deviceContext = EECore::s_EECore->GetDeviceContext(); // BlurBuffer D3D11_BUFFER_DESC bufferDesc; bufferDesc.ByteWidth = sizeof(EEBlurBufferDesc); bufferDesc.Usage = D3D11_USAGE_DYNAMIC; bufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; bufferDesc.MiscFlags = 0; bufferDesc.StructureByteStride = 0; result = device->CreateBuffer(&bufferDesc, NULL, &s_blurBuffer); if (FAILED(result)) return false; ID3D10Blob *errorMessage = nullptr; ID3D10Blob *computeShaderBuffer = nullptr; // CS result = D3DX11CompileFromFileW(L"EEShader\\EEBlurShader.hlsl", NULL, NULL, "BlurHorzCS", "cs_5_0", D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &computeShaderBuffer, &errorMessage, NULL); if (FAILED(result)) { if (errorMessage) MessageBoxA(NULL, (char*)errorMessage->GetBufferPointer(), "Compile Shader Error!", MB_OK); SAFE_RELEASE(errorMessage); SAFE_RELEASE(computeShaderBuffer); return false; } result = device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), computeShaderBuffer->GetBufferSize(), NULL, &s_blurHorzCS); if (FAILED(result)) { SAFE_RELEASE(errorMessage); SAFE_RELEASE(computeShaderBuffer); return false; } result = D3DX11CompileFromFileW(L"EEShader\\EEBlurShader.hlsl", NULL, NULL, "BlurVertCS", "cs_5_0", D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &computeShaderBuffer, &errorMessage, NULL); if (FAILED(result)) { if (errorMessage) MessageBoxA(NULL, (char*)errorMessage->GetBufferPointer(), "Compile Shader Error!", MB_OK); SAFE_RELEASE(errorMessage); SAFE_RELEASE(computeShaderBuffer); return false; } result = device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), computeShaderBuffer->GetBufferSize(), NULL, &s_blurVertCS); if (FAILED(result)) { SAFE_RELEASE(errorMessage); SAFE_RELEASE(computeShaderBuffer); return false; } s_isBlurCInitialized = true; } return true; }
bool D3D11ComputeEvaluator::Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, ID3D11DeviceContext *deviceContext) { if (srcDesc.length > dstDesc.length) { Far::Error(Far::FAR_RUNTIME_ERROR, "srcDesc length must be less than or equal to " "dstDesc length.\n"); return false; } DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS; #if defined(D3D10_SHADER_RESOURCES_MAY_ALIAS) dwShaderFlags |= D3D10_SHADER_RESOURCES_MAY_ALIAS; #endif #ifdef _DEBUG dwShaderFlags |= D3DCOMPILE_DEBUG; #endif std::ostringstream ss; ss << srcDesc.length; std::string lengthValue(ss.str()); ss.str(""); ss << srcDesc.stride; std::string srcStrideValue(ss.str()); ss.str(""); ss << dstDesc.stride; std::string dstStrideValue(ss.str()); ss.str(""); ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str(""); D3D_SHADER_MACRO defines[] = { "LENGTH", lengthValue.c_str(), "SRC_STRIDE", srcStrideValue.c_str(), "DST_STRIDE", dstStrideValue.c_str(), "WORK_GROUP_SIZE", workgroupSizeValue.c_str(), 0, 0 }; ID3DBlob * computeShaderBuffer = NULL; ID3DBlob * errorBuffer = NULL; HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource), NULL, &defines[0], NULL, "cs_main", "cs_5_0", dwShaderFlags, 0, &computeShaderBuffer, &errorBuffer); if (FAILED(hr)) { if (errorBuffer != NULL) { Far::Error(Far::FAR_RUNTIME_ERROR, "Error compiling HLSL shader: %s\n", (CHAR*)errorBuffer->GetBufferPointer()); errorBuffer->Release(); return false; } } ID3D11Device *device = NULL; deviceContext->GetDevice(&device); assert(device); device->CreateClassLinkage(&_classLinkage); assert(_classLinkage); device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), computeShaderBuffer->GetBufferSize(), _classLinkage, &_computeShader); assert(_computeShader); ID3D11ShaderReflection *reflector; D3DReflect(computeShaderBuffer->GetBufferPointer(), computeShaderBuffer->GetBufferSize(), IID_ID3D11ShaderReflection, (void**) &reflector); assert(reflector); assert(reflector->GetNumInterfaceSlots() == 1); reflector->Release(); computeShaderBuffer->Release(); _classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel); assert(_singleBufferKernel); _classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel); assert(_separateBufferKernel); D3D11_BUFFER_DESC cbDesc; ZeroMemory(&cbDesc, sizeof(cbDesc)); cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; cbDesc.MiscFlags = 0; cbDesc.ByteWidth = sizeof(KernelUniformArgs); device->CreateBuffer(&cbDesc, NULL, &_uniformArgs); return true; }
int main() { // Create Device const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1 }; UINT createDeviceFlags = 0; #ifdef _DEBUG createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG; #endif /* ================================== */ printf("Creating device..."); ID3D11Device* device = nullptr; HRESULT hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, lvl, _countof(lvl), D3D11_SDK_VERSION, &device, nullptr, nullptr); if (FAILED(hr)) { printf("Failed creating Direct3D 11 device %08X\n", hr); return returnDBG(-1); } /* ================================== */ printf("Compile Compute Shader..."); ID3DBlob *csBlob = nullptr; hr = CompileComputeShader(L"ComputeShader.hlsl", "main", device, &csBlob); if (FAILED(hr)) { device->Release(); printf("Failed compiling shader %08X\n", hr); return returnDBG(-1); } /* ================================== */ printf("Create Compute Shader..."); ID3D11ComputeShader* computeShader = nullptr; hr = device->CreateComputeShader(csBlob->GetBufferPointer(), csBlob->GetBufferSize(), nullptr, &computeShader); /* ================================== */ printf("Creating buffers and filling them with initial data..."); for (int i = 0; i < NUM_ELEMENTS; ++i) { g_vBuf0[i].i = i; g_vBuf0[i].f = (float)i; g_vBuf1[i].i = i; g_vBuf1[i].f = (float)i; } CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), &g_vBuf0[0], &g_pBuf0); CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), &g_vBuf1[0], &g_pBuf1); CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), nullptr, &g_pBufResult); /* ================================== */ printf("Running Compute Shader..."); ID3D11ShaderResourceView* aRViews[2] = { g_pBuf0SRV, g_pBuf1SRV }; RunComputeShader(g_pContext, g_pCS, 2, aRViews, nullptr, nullptr, 0, g_pBufResultUAV, NUM_ELEMENTS, 1, 1); printf("done\n"); csBlob->Release(); if (FAILED(hr)) { device->Release(); } printf("Success\n"); // Clean up computeShader->Release(); device->Release(); return returnDBG(0); }
int _tmain(int /*argc*/, _TCHAR* /*argv[]*/) { // GROUP_SIZE_X defined in kernel.hlsl must match the // groupSize declared here. size_t const groupSize = 512; size_t const numGroups = 16; size_t const dimension = numGroups*groupSize; // Create a D3D11 device and immediate context. // TODO: The code below uses the default video adapter, with the // default set of feature levels. Please see the MSDN docs if // you wish to control which adapter and feature level are used. D3D_FEATURE_LEVEL featureLevel; ID3D11Device* device = nullptr; ID3D11DeviceContext* context = nullptr; HRESULT hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, NULL, NULL, 0, D3D11_SDK_VERSION, &device, &featureLevel, &context); if (FAILED(hr)) { printf("D3D11CreateDevice failed with return code %x\n", hr); return hr; } // Create system memory and fill it with our initial data. Note that // these data structures aren't really necessary , it's just a demonstration // of how you can take existing data structures you might have and copy // their data to/from GPU computations. std::vector<float> x(dimension); std::vector<float> y(dimension); std::vector<float> z(dimension); float const a = 2.0f; for (size_t i = 0; i < dimension; ++ i) { x[i] = static_cast<float>(i); y[i] = 100 - static_cast<float>(i); } // Create structured buffers for the "x" and "y" vectors. D3D11_BUFFER_DESC inputBufferDesc; inputBufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; // The buffers are read-only by the GPU, writeable by the CPU. // TODO: If you will never again upate the data in a GPU buffer, // you might want to look at using a D3D11_SUBRESOURCE_DATA here to // provide the initialization data instead of doing the mapping // and copying that happens below. inputBufferDesc.Usage = D3D11_USAGE_DYNAMIC; inputBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; inputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; inputBufferDesc.StructureByteStride = sizeof(float); inputBufferDesc.ByteWidth = sizeof(float) * dimension; ID3D11Buffer* xBuffer = nullptr; hr = device->CreateBuffer(&inputBufferDesc, NULL, &xBuffer); if (FAILED(hr)) { printf("CreateBuffer failed for x buffer with return code %x\n", hr); return hr; } // We can re-use inputBufferDesc here because the layout and usage of the x // and y buffers is exactly the same. ID3D11Buffer* yBuffer = nullptr; hr = device->CreateBuffer(&inputBufferDesc, NULL, &yBuffer); if (FAILED(hr)) { printf("CreateBuffer failed for x buffer with return code %x\n", hr); return hr; } // Create shader resource views for the "x" and "y" buffers. // TODO: You can optionally provide a D3D11_SHADER_RESOURCE_VIEW_DESC // as the second parameter if you need to use only part of the buffer // inside the compute shader. ID3D11ShaderResourceView* xSRV = nullptr; hr = device->CreateShaderResourceView(xBuffer, NULL, &xSRV); if (FAILED(hr)) { printf("CreateShaderResourceView failed for x buffer with return code %x\n", hr); return hr; } ID3D11ShaderResourceView* ySRV = nullptr; hr = device->CreateShaderResourceView(yBuffer, NULL, &ySRV); if (FAILED(hr)) { printf("CreateShaderResourceView failed for y buffer with return code %x\n", hr); return hr; } // Create a structured buffer for the "z" vector. This buffer needs to be // writeable by the GPU, so we can't create it with CPU read/write access. D3D11_BUFFER_DESC outputBufferDesc; outputBufferDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; outputBufferDesc.Usage = D3D11_USAGE_DEFAULT; outputBufferDesc.CPUAccessFlags = 0; outputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; outputBufferDesc.StructureByteStride = sizeof(float); outputBufferDesc.ByteWidth = sizeof(float) * dimension; ID3D11Buffer* zBuffer = nullptr; hr = device->CreateBuffer(&outputBufferDesc, NULL, &zBuffer); if (FAILED(hr)) { printf("CreateBuffer failed for z buffer with return code %x\n", hr); return hr; } // Create an unordered access view for the "z" vector. D3D11_UNORDERED_ACCESS_VIEW_DESC outputUAVDesc; outputUAVDesc.Buffer.FirstElement = 0; outputUAVDesc.Buffer.Flags = 0; outputUAVDesc.Buffer.NumElements = dimension; outputUAVDesc.Format = DXGI_FORMAT_UNKNOWN; outputUAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; ID3D11UnorderedAccessView* zBufferUAV; hr = device->CreateUnorderedAccessView(zBuffer, &outputUAVDesc, &zBufferUAV); if (FAILED(hr)) { printf("CreateUnorderedAccessView failed for z buffer with return code %x\n", hr); return hr; } // Create a staging buffer, which will be used to copy back from zBuffer. D3D11_BUFFER_DESC stagingBufferDesc; stagingBufferDesc.BindFlags = 0; stagingBufferDesc.Usage = D3D11_USAGE_STAGING; stagingBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; stagingBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; stagingBufferDesc.StructureByteStride = sizeof(float); stagingBufferDesc.ByteWidth = sizeof(float) * dimension; ID3D11Buffer* stagingBuffer; hr = device->CreateBuffer(&stagingBufferDesc, NULL, &stagingBuffer); if (FAILED(hr)) { printf("CreateBuffer failed for staging buffer with return code %x\n", hr); return hr; } // Create a constant buffer (this buffer is used to pass the constant // value 'a' to the kernel as cbuffer Constants). D3D11_BUFFER_DESC cbDesc; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; cbDesc.MiscFlags = 0; // Even though the constant buffer only has one float, DX expects // ByteWidth to be a multiple of 4 floats (i.e., one 128-bit register). cbDesc.ByteWidth = sizeof(float)*4; ID3D11Buffer* constantBuffer = nullptr; hr = device->CreateBuffer( &cbDesc, NULL, &constantBuffer); if (FAILED(hr)) { printf("CreateBuffer failed for constant buffer with return code %x\n", hr); return hr; } // Map the constant buffer and set the constant value 'a'. D3D11_MAPPED_SUBRESOURCE mappedResource; context->Map(constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); float* constants = reinterpret_cast<float*>(mappedResource.pData); constants[0] = a; constants = nullptr; context->Unmap(constantBuffer, 0); // Map the x buffer and copy our data into it. context->Map(xBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); float* xvalues = reinterpret_cast<float*>(mappedResource.pData); memcpy(xvalues, &x[0], sizeof(float)*x.size()); xvalues = nullptr; context->Unmap(xBuffer, 0); // Map the y buffer and copy our data into it. context->Map(yBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); float* yvalues = reinterpret_cast<float*>(mappedResource.pData); memcpy(yvalues, &y[0], sizeof(float)*y.size()); yvalues = nullptr; context->Unmap(yBuffer, 0); // Compile the compute shader into a blob. ID3DBlob* errorBlob = nullptr; ID3DBlob* shaderBlob = nullptr; hr = D3DX11CompileFromFile(L"kernel.hlsl", NULL, NULL, "saxpy", "cs_4_0", D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &shaderBlob, &errorBlob, NULL); if (FAILED(hr)) { // Print out the error message if there is one. if (errorBlob) { char const* message = (char*)errorBlob->GetBufferPointer(); printf("kernel.hlsl failed to compile; error message:\n"); printf("%s\n", message); errorBlob->Release(); } return hr; } // Create a shader object from the compiled blob. ID3D11ComputeShader* computeShader; hr = device->CreateComputeShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), NULL, &computeShader); if (FAILED(hr)) { printf("CreateComputeShader failed with return code %x\n", hr); return hr; } // Make the shader active. context->CSSetShader(computeShader, NULL, 0); // Attach the z buffer to the output via its unordered access view. UINT initCounts = 0xFFFFFFFF; context->CSSetUnorderedAccessViews(0, 1, &zBufferUAV, &initCounts); // Attach the input buffers via their shader resource views. context->CSSetShaderResources(0, 1, &xSRV); context->CSSetShaderResources(1, 1, &ySRV); // Attach the constant buffer context->CSSetConstantBuffers(0, 1, &constantBuffer); // Execute the shader, in 'numGroups' groups of 'groupSize' threads each. context->Dispatch(numGroups, 1, 1); // Copy the z buffer to the staging buffer so that we can // retrieve the data for accesss by the CPU. context->CopyResource(stagingBuffer, zBuffer); // Map the staging buffer for reading. context->Map(stagingBuffer, 0, D3D11_MAP_READ, 0, &mappedResource); float* zData = reinterpret_cast<float*>(mappedResource.pData); memcpy(&z[0], zData, sizeof(float)*z.size()); zData = nullptr; context->Unmap(stagingBuffer, 0); // Now compare the GPU results against expected values. bool resultOK = true; for (size_t i = 0; i < x.size(); ++ i) { // NOTE: This comparison assumes the GPU produces *exactly* the // same result as the CPU. In general, this will not be the case // with floating-point calculations. float const expected = a*x[i] + y[i]; if (z[i] != expected) { printf("Unexpected result at position %lu: expected %.7e, got %.7e\n", i, expected, z[i]); resultOK = false; } } if (!resultOK) { printf("GPU results differed from the CPU results.\n"); OutputDebugStringA("GPU results differed from the CPU results.\n"); return 1; } printf("GPU output matched the CPU results.\n"); OutputDebugStringA("GPU output matched the CPU results.\n"); // Disconnect everything from the pipeline. ID3D11UnorderedAccessView* nullUAV = nullptr; context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, &initCounts); ID3D11ShaderResourceView* nullSRV = nullptr; context->CSSetShaderResources(0, 1, &nullSRV); context->CSSetShaderResources(1, 1, &nullSRV); ID3D11Buffer* nullBuffer = nullptr; context->CSSetConstantBuffers(0, 1, &nullBuffer); // Release resources. Again, note that none of the error checks above // release resources that have been allocated up to this point, so the // sample doesn't clean up after itself correctly unless everything succeeds. computeShader->Release(); shaderBlob->Release(); constantBuffer->Release(); stagingBuffer->Release(); zBufferUAV->Release(); zBuffer->Release(); xSRV->Release(); xBuffer->Release(); ySRV->Release(); yBuffer->Release(); context->Release(); device->Release(); return 0; }