Exemplo n.º 1
0
int main()
{
    // Create Device
    const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1 };

    UINT createDeviceFlags = 0;
#ifdef _DEBUG
    createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
#endif

    /* ================================== */
    printf("Creating device...");
    ID3D11Device* device = nullptr;
    HRESULT hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, lvl, _countof(lvl), D3D11_SDK_VERSION, &device, nullptr, nullptr);
    if (FAILED(hr))
    {
        printf("Failed creating Direct3D 11 device %08X\n", hr);
        return returnDBG(-1);
    }

    /* ================================== */
    printf("Compile Compute Shader...");
    ID3DBlob *csBlob = nullptr;
    hr = CompileComputeShader(L"ComputeShader.hlsl", "main", device, &csBlob);
    if (FAILED(hr))
    {
        device->Release();
        printf("Failed compiling shader %08X\n", hr);
        return returnDBG(-1);
    }

    /* ================================== */
    printf("Create Compute Shader...");
    ID3D11ComputeShader* computeShader = nullptr;
    hr = device->CreateComputeShader(csBlob->GetBufferPointer(), csBlob->GetBufferSize(), nullptr, &computeShader);
            
    /* ================================== */
    printf("Creating buffers and filling them with initial data...");
    for (int i = 0; i < NUM_ELEMENTS; ++i)
    {
        g_vBuf0[i].i = i;
        g_vBuf0[i].f = (float)i;

        g_vBuf1[i].i = i;
        g_vBuf1[i].f = (float)i;
    }

    CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), &g_vBuf0[0], &g_pBuf0);
    CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), &g_vBuf1[0], &g_pBuf1);
    CreateRawBuffer(g_pDevice, NUM_ELEMENTS * sizeof(BufType), nullptr, &g_pBufResult);

    /* ================================== */
    printf("Running Compute Shader...");
    ID3D11ShaderResourceView* aRViews[2] = { g_pBuf0SRV, g_pBuf1SRV };
    RunComputeShader(g_pContext, g_pCS, 2, aRViews, nullptr, nullptr, 0, g_pBufResultUAV, NUM_ELEMENTS, 1, 1);
    printf("done\n");
    
    
    
    csBlob->Release();

    if (FAILED(hr))
    {
        device->Release();
    }

    printf("Success\n");



    // Clean up
    computeShader->Release();

    device->Release();

    return returnDBG(0);
}
int _tmain(int /*argc*/, _TCHAR* /*argv[]*/)
{
    // GROUP_SIZE_X defined in kernel.hlsl must match the 
    // groupSize declared here.
    size_t const groupSize = 512;
    size_t const numGroups = 16;
    size_t const dimension = numGroups*groupSize;

    // Create a D3D11 device and immediate context. 
    // TODO: The code below uses the default video adapter, with the
    // default set of feature levels.  Please see the MSDN docs if 
    // you wish to control which adapter and feature level are used.
    D3D_FEATURE_LEVEL featureLevel;
    ID3D11Device* device = nullptr;
    ID3D11DeviceContext* context = nullptr;
    HRESULT hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 
        NULL, NULL, 0, D3D11_SDK_VERSION, &device, 
        &featureLevel, &context);
    if (FAILED(hr))
    {
        printf("D3D11CreateDevice failed with return code %x\n", hr);
        return hr;
    }

    // Create system memory and fill it with our initial data.  Note that
    // these data structures aren't really necessary , it's just a demonstration
    // of how you can take existing data structures you might have and copy
    // their data to/from GPU computations.
    std::vector<float> x(dimension);
    std::vector<float> y(dimension);
    std::vector<float> z(dimension);
    float const a = 2.0f;
    for (size_t i = 0; i < dimension; ++ i)
    {
        x[i] = static_cast<float>(i);
        y[i] = 100 - static_cast<float>(i);
    }

    // Create structured buffers for the "x" and "y" vectors.
    D3D11_BUFFER_DESC inputBufferDesc;
    inputBufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
    // The buffers are read-only by the GPU, writeable by the CPU.
    // TODO: If you will never again upate the data in a GPU buffer,
    // you might want to look at using a D3D11_SUBRESOURCE_DATA here to
    // provide the initialization data instead of doing the mapping 
    // and copying that happens below.
    inputBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
    inputBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    inputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    inputBufferDesc.StructureByteStride = sizeof(float);
    inputBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* xBuffer = nullptr;
    hr = device->CreateBuffer(&inputBufferDesc, NULL, &xBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for x buffer with return code %x\n", hr);
        return hr;
    }
    // We can re-use inputBufferDesc here because the layout and usage of the x
    // and y buffers is exactly the same.
    ID3D11Buffer* yBuffer = nullptr;
    hr = device->CreateBuffer(&inputBufferDesc, NULL, &yBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for x buffer with return code %x\n", hr);
        return hr;
    }

    // Create shader resource views for the "x" and "y" buffers.
    // TODO: You can optionally provide a D3D11_SHADER_RESOURCE_VIEW_DESC
    // as the second parameter if you need to use only part of the buffer
    // inside the compute shader.
    ID3D11ShaderResourceView* xSRV = nullptr;
    hr = device->CreateShaderResourceView(xBuffer, NULL, &xSRV);
    if (FAILED(hr))
    {
        printf("CreateShaderResourceView failed for x buffer with return code %x\n", hr);
        return hr;
    }

    ID3D11ShaderResourceView* ySRV = nullptr;
    hr = device->CreateShaderResourceView(yBuffer, NULL, &ySRV);
    if (FAILED(hr))
    {
        printf("CreateShaderResourceView failed for y buffer with return code %x\n", hr);
        return hr;
    }

    // Create a structured buffer for the "z" vector.  This buffer needs to be 
    // writeable by the GPU, so we can't create it with CPU read/write access.
    D3D11_BUFFER_DESC outputBufferDesc;
    outputBufferDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS 
        | D3D11_BIND_SHADER_RESOURCE;
    outputBufferDesc.Usage = D3D11_USAGE_DEFAULT;
    outputBufferDesc.CPUAccessFlags = 0;
    outputBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    outputBufferDesc.StructureByteStride = sizeof(float);
    outputBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* zBuffer = nullptr;
    hr = device->CreateBuffer(&outputBufferDesc, NULL, &zBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for z buffer with return code %x\n", hr);
        return hr;
    }

    // Create an unordered access view for the "z" vector.  
    D3D11_UNORDERED_ACCESS_VIEW_DESC outputUAVDesc;
    outputUAVDesc.Buffer.FirstElement = 0;        
    outputUAVDesc.Buffer.Flags = 0;            
    outputUAVDesc.Buffer.NumElements = dimension;
    outputUAVDesc.Format = DXGI_FORMAT_UNKNOWN;    
    outputUAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;   
    ID3D11UnorderedAccessView* zBufferUAV;
    hr = device->CreateUnorderedAccessView(zBuffer, 
        &outputUAVDesc, &zBufferUAV);
    if (FAILED(hr))
    {
        printf("CreateUnorderedAccessView failed for z buffer with return code %x\n", hr);
        return hr;
    }

    // Create a staging buffer, which will be used to copy back from zBuffer.
    D3D11_BUFFER_DESC stagingBufferDesc;
    stagingBufferDesc.BindFlags = 0;
    stagingBufferDesc.Usage = D3D11_USAGE_STAGING;  
    stagingBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
    stagingBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
    stagingBufferDesc.StructureByteStride = sizeof(float);
    stagingBufferDesc.ByteWidth = sizeof(float) * dimension;
    ID3D11Buffer* stagingBuffer;
    hr = device->CreateBuffer(&stagingBufferDesc, NULL, &stagingBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for staging buffer with return code %x\n", hr);
        return hr;
    }

    // Create a constant buffer (this buffer is used to pass the constant 
    // value 'a' to the kernel as cbuffer Constants).
    D3D11_BUFFER_DESC cbDesc;
    cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
    cbDesc.Usage = D3D11_USAGE_DYNAMIC;  
    cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
    cbDesc.MiscFlags = 0;
    // Even though the constant buffer only has one float, DX expects
    // ByteWidth to be a multiple of 4 floats (i.e., one 128-bit register).
    cbDesc.ByteWidth = sizeof(float)*4;
    ID3D11Buffer* constantBuffer = nullptr;
    hr = device->CreateBuffer( &cbDesc, NULL, &constantBuffer);
    if (FAILED(hr))
    {
        printf("CreateBuffer failed for constant buffer with return code %x\n", hr);
        return hr;
    }

    // Map the constant buffer and set the constant value 'a'.
    D3D11_MAPPED_SUBRESOURCE mappedResource;
    context->Map(constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* constants = reinterpret_cast<float*>(mappedResource.pData);
    constants[0] = a;
    constants = nullptr;
    context->Unmap(constantBuffer, 0);

    // Map the x buffer and copy our data into it.
    context->Map(xBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* xvalues = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(xvalues, &x[0], sizeof(float)*x.size());
    xvalues = nullptr;
    context->Unmap(xBuffer, 0);

    // Map the y buffer and copy our data into it.
    context->Map(yBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
    float* yvalues = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(yvalues, &y[0], sizeof(float)*y.size());
    yvalues = nullptr;
    context->Unmap(yBuffer, 0);

    // Compile the compute shader into a blob.
    ID3DBlob* errorBlob = nullptr;
    ID3DBlob* shaderBlob = nullptr;
    hr = D3DX11CompileFromFile(L"kernel.hlsl", NULL, NULL, "saxpy", "cs_4_0",
        D3D10_SHADER_ENABLE_STRICTNESS, 0, NULL, &shaderBlob, &errorBlob, NULL);
    if (FAILED(hr))
    {
        // Print out the error message if there is one.
        if (errorBlob)
        {
            char const* message = (char*)errorBlob->GetBufferPointer();
            printf("kernel.hlsl failed to compile; error message:\n");
            printf("%s\n", message);
            errorBlob->Release();
        }
        return hr;
    }

    // Create a shader object from the compiled blob.
    ID3D11ComputeShader* computeShader;
    hr = device->CreateComputeShader(shaderBlob->GetBufferPointer(), 
        shaderBlob->GetBufferSize(), NULL, &computeShader);
    if (FAILED(hr))
    {
        printf("CreateComputeShader failed with return code %x\n", hr);
        return hr;
    }

    // Make the shader active.
    context->CSSetShader(computeShader, NULL, 0);

    // Attach the z buffer to the output via its unordered access view.
    UINT initCounts = 0xFFFFFFFF;
    context->CSSetUnorderedAccessViews(0, 1, &zBufferUAV, &initCounts);

    // Attach the input buffers via their shader resource views.
    context->CSSetShaderResources(0, 1, &xSRV);
    context->CSSetShaderResources(1, 1, &ySRV);

    // Attach the constant buffer
    context->CSSetConstantBuffers(0, 1, &constantBuffer);

    // Execute the shader, in 'numGroups' groups of 'groupSize' threads each.
    context->Dispatch(numGroups, 1, 1);

    // Copy the z buffer to the staging buffer so that we can 
    // retrieve the data for accesss by the CPU.
    context->CopyResource(stagingBuffer, zBuffer);

    // Map the staging buffer for reading.
    context->Map(stagingBuffer, 0, D3D11_MAP_READ, 0, &mappedResource);
    float* zData = reinterpret_cast<float*>(mappedResource.pData);
    memcpy(&z[0], zData, sizeof(float)*z.size());
    zData = nullptr;
    context->Unmap(stagingBuffer, 0);

    // Now compare the GPU results against expected values.
    bool resultOK = true;
    for (size_t i = 0; i < x.size(); ++ i)
    {
        // NOTE: This comparison assumes the GPU produces *exactly* the 
        // same result as the CPU.  In general, this will not be the case
        // with floating-point calculations.
        float const expected = a*x[i] + y[i];
        if (z[i] != expected)
        {
            printf("Unexpected result at position %lu: expected %.7e, got %.7e\n",
                i, expected, z[i]);
            resultOK = false;
        }
    }

    if (!resultOK)
    {
        printf("GPU results differed from the CPU results.\n");
        OutputDebugStringA("GPU results differed from the CPU results.\n");
        return 1;
    }

    printf("GPU output matched the CPU results.\n");
    OutputDebugStringA("GPU output matched the CPU results.\n");

    // Disconnect everything from the pipeline.
    ID3D11UnorderedAccessView* nullUAV = nullptr;
    context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, &initCounts);
    ID3D11ShaderResourceView* nullSRV = nullptr;
    context->CSSetShaderResources(0, 1, &nullSRV);
    context->CSSetShaderResources(1, 1, &nullSRV);
    ID3D11Buffer* nullBuffer = nullptr;
    context->CSSetConstantBuffers(0, 1, &nullBuffer);

    // Release resources.  Again, note that none of the error checks above
    // release resources that have been allocated up to this point, so the 
    // sample doesn't clean up after itself correctly unless everything succeeds.
    computeShader->Release();
    shaderBlob->Release();
    constantBuffer->Release();
    stagingBuffer->Release();
    zBufferUAV->Release();
    zBuffer->Release();
    xSRV->Release();
    xBuffer->Release();
    ySRV->Release();
    yBuffer->Release();
    context->Release();
    device->Release();

    return 0;
}