bool VertexBufferInterface::directStoragePossible(const gl::VertexAttribute &attrib, const gl::VertexAttribCurrentValueData ¤tValue) const { gl::Buffer *buffer = attrib.buffer.get(); BufferD3D *storage = buffer ? BufferD3D::makeBufferD3D(buffer->getImplementation()) : NULL; if (!storage || !storage->supportsDirectBinding()) { return false; } // Alignment restrictions: In D3D, vertex data must be aligned to // the format stride, or to a 4-byte boundary, whichever is smaller. // (Undocumented, and experimentally confirmed) size_t alignment = 4; bool requiresConversion = false; if (attrib.type != GL_FLOAT) { gl::VertexFormat vertexFormat(attrib, currentValue.Type); unsigned int outputElementSize; getVertexBuffer()->getSpaceRequired(attrib, 1, 0, &outputElementSize); alignment = std::min<size_t>(outputElementSize, 4); requiresConversion = (mRenderer->getVertexConversionType(vertexFormat) & VERTEX_CONVERT_CPU) != 0; } bool isAligned = (static_cast<size_t>(ComputeVertexAttributeStride(attrib)) % alignment == 0) && (static_cast<size_t>(attrib.offset) % alignment == 0); return !requiresConversion && isAligned; }
gl::Error VertexBuffer11::storeVertexAttributes(const gl::VertexAttribute &attrib, const gl::VertexAttribCurrentValueData ¤tValue, GLint start, GLsizei count, GLsizei instances, unsigned int offset) { if (!mBuffer) { return gl::Error(GL_OUT_OF_MEMORY, "Internal vertex buffer is not initialized."); } gl::Buffer *buffer = attrib.buffer.get(); int inputStride = ComputeVertexAttributeStride(attrib); ID3D11DeviceContext *dxContext = mRenderer->getDeviceContext(); D3D11_MAPPED_SUBRESOURCE mappedResource; HRESULT result = dxContext->Map(mBuffer, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &mappedResource); if (FAILED(result)) { return gl::Error(GL_OUT_OF_MEMORY, "Failed to map internal vertex buffer, HRESULT: 0x%08x.", result); } uint8_t *output = reinterpret_cast<uint8_t*>(mappedResource.pData) + offset; const uint8_t *input = NULL; if (attrib.enabled) { if (buffer) { BufferD3D *storage = BufferD3D::makeFromBuffer(buffer); gl::Error error = storage->getData(&input); if (error.isError()) { return error; } input += static_cast<int>(attrib.offset); } else { input = static_cast<const uint8_t*>(attrib.pointer); } } else { input = reinterpret_cast<const uint8_t*>(currentValue.FloatValues); } if (instances == 0 || attrib.divisor == 0) { input += inputStride * start; } gl::VertexFormat vertexFormat(attrib, currentValue.Type); const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat); ASSERT(vertexFormatInfo.copyFunction != NULL); vertexFormatInfo.copyFunction(input, inputStride, count, output); dxContext->Unmap(mBuffer, 0); return gl::Error(GL_NO_ERROR); }
DebugDraw::DebugDraw() { // Init Shader ShaderDef def; const char *vertProgram = "" "#version 430\n" "uniform mat4 mat_projview;\n" "uniform mat4 mat_model;\n" "in vec4 position;\n" "in vec4 color0;\n" "out vec4 vertex_color;\n" "void main(void) {\n" " mat4 mat_mv = mat_projview * mat_model;\n" " gl_Position = mat_mv * vec4(position.xyz, 1.0);\n" " vertex_color = color0 / 255.0;\n" "}\n"; const char *fragProgram = "" "#version 430\n" "precision highp float;\n" "in vec4 vertex_color;\n" "out vec4 out_Color;\n" "void main(void) {\n" " out_Color = vertex_color;\n" "}\n"; bool ret = wrappers::opengl::CreateShaderFromProgram(def, vertProgram, fragProgram); CHECK_M(ret, "Unable to compile DebugDraw shader"); ret = m_shader.loadFromDef(def); CHECK_M(ret, "Unable to create DebugDraw shader from program"); ret = m_shader.activate(); CHECK_M(ret, "Unable to activate DebugDraw shader"); ret = m_shader.getUniform("mat_projview", m_pProjViewMat); CHECK_M(ret, "Unable to locate mat_projview uniform"); ret = m_shader.getUniform("mat_model", m_pModelMat); CHECK_M(ret, "Unable to locate mat_model uniform"); ret = m_shader.deactivate(); ASSERT(ret); // Init vertex buffer VertexFormatDef::Builder vertexFormatDef; vertexFormatDef.add_elem(VertexFormatDefElem::Builder() .set_name("position") .set_count(4) .set_size(sizeof(float)) .set_pad(0) .set_type(GL_FLOAT) .build()); vertexFormatDef.add_elem(VertexFormatDefElem::Builder() .set_name("color0") .set_count(4) .set_size(sizeof(u8)) .set_pad(0) .set_type(GL_UNSIGNED_BYTE) .build()); VertexFormatDesc vertexFormat(&m_shader, vertexFormatDef.build()); ret = m_vbo.setFormat(vertexFormat); CHECK_M(ret, "Unable to create DebugDraw vertex buffer."); }
bool VertexBuffer11::storeVertexAttributes(const gl::VertexAttribute &attrib, const gl::VertexAttribCurrentValueData ¤tValue, GLint start, GLsizei count, GLsizei instances, unsigned int offset) { if (mBuffer) { gl::Buffer *buffer = attrib.buffer.get(); int inputStride = ComputeVertexAttributeStride(attrib); ID3D11DeviceContext *dxContext = mRenderer->getDeviceContext(); D3D11_MAPPED_SUBRESOURCE mappedResource; HRESULT result = dxContext->Map(mBuffer, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &mappedResource); if (FAILED(result)) { ERR("Vertex buffer map failed with error 0x%08x", result); return false; } uint8_t* output = reinterpret_cast<uint8_t*>(mappedResource.pData) + offset; const uint8_t *input = NULL; if (attrib.enabled) { if (buffer) { Buffer11 *storage = Buffer11::makeBuffer11(buffer->getImplementation()); input = static_cast<const uint8_t*>(storage->getData()) + static_cast<int>(attrib.offset); } else { input = static_cast<const uint8_t*>(attrib.pointer); } } else { input = reinterpret_cast<const uint8_t*>(currentValue.FloatValues); } if (instances == 0 || attrib.divisor == 0) { input += inputStride * start; } gl::VertexFormat vertexFormat(attrib, currentValue.Type); const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat); ASSERT(vertexFormatInfo.copyFunction != NULL); vertexFormatInfo.copyFunction(input, inputStride, count, output); dxContext->Unmap(mBuffer, 0); return true; } else { ERR("Vertex buffer not initialized."); return false; } }
gl::Error VertexBuffer11::getSpaceRequired(const gl::VertexAttribute &attrib, GLsizei count, GLsizei instances, unsigned int *outSpaceRequired) const { unsigned int elementCount = 0; if (attrib.enabled) { if (instances == 0 || attrib.divisor == 0) { elementCount = count; } else { // Round up to divisor, if possible elementCount = UnsignedCeilDivide(static_cast<unsigned int>(instances), attrib.divisor); } gl::VertexFormat vertexFormat(attrib); const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat); const d3d11::DXGIFormat &dxgiFormatInfo = d3d11::GetDXGIFormatInfo(vertexFormatInfo.nativeFormat); unsigned int elementSize = dxgiFormatInfo.pixelBytes; if (elementSize <= std::numeric_limits<unsigned int>::max() / elementCount) { if (outSpaceRequired) { *outSpaceRequired = elementSize * elementCount; } return gl::Error(GL_NO_ERROR); } else { return gl::Error(GL_OUT_OF_MEMORY, "New vertex buffer size would result in an overflow."); } } else { const unsigned int elementSize = 4; if (outSpaceRequired) { *outSpaceRequired = elementSize * 4; } return gl::Error(GL_NO_ERROR); } }
bool VertexBuffer11::getSpaceRequired(const gl::VertexAttribute &attrib, GLsizei count, GLsizei instances, unsigned int *outSpaceRequired) const { unsigned int elementCount = 0; if (attrib.enabled) { if (instances == 0 || attrib.divisor == 0) { elementCount = count; } else { // Round up to divisor, if possible elementCount = rx::UnsignedCeilDivide(static_cast<unsigned int>(instances), attrib.divisor); } gl::VertexFormat vertexFormat(attrib); const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat); const d3d11::DXGIFormat &dxgiFormatInfo = d3d11::GetDXGIFormatInfo(vertexFormatInfo.nativeFormat); unsigned int elementSize = dxgiFormatInfo.pixelBytes; if (elementSize <= std::numeric_limits<unsigned int>::max() / elementCount) { if (outSpaceRequired) { *outSpaceRequired = elementSize * elementCount; } return true; } else { return false; } } else { const unsigned int elementSize = 4; if (outSpaceRequired) { *outSpaceRequired = elementSize * 4; } return true; } }
gl::Error VertexBuffer9::spaceRequired(const gl::VertexAttribute &attrib, std::size_t count, GLsizei instances, unsigned int *outSpaceRequired) const { gl::VertexFormat vertexFormat(attrib, GL_FLOAT); const d3d9::VertexFormat &d3d9VertexInfo = d3d9::GetVertexFormatInfo(mRenderer->getCapsDeclTypes(), vertexFormat); if (attrib.enabled) { unsigned int elementCount = 0; if (instances == 0 || attrib.divisor == 0) { elementCount = count; } else { // Round up to divisor, if possible elementCount = UnsignedCeilDivide(static_cast<unsigned int>(instances), attrib.divisor); } if (d3d9VertexInfo.outputElementSize <= std::numeric_limits<unsigned int>::max() / elementCount) { if (outSpaceRequired) { *outSpaceRequired = d3d9VertexInfo.outputElementSize * elementCount; } return gl::Error(GL_NO_ERROR); } else { return gl::Error(GL_OUT_OF_MEMORY, "New vertex buffer size would result in an overflow."); } } else { const unsigned int elementSize = 4; if (outSpaceRequired) { *outSpaceRequired = elementSize * 4; } return gl::Error(GL_NO_ERROR); } }
SpriteBatch* SpriteBatch::create(Texture* texture, Effect* effect, unsigned int initialCapacity) { GP_ASSERT(texture != NULL); bool customEffect = (effect != NULL); if (!customEffect) { // Create our static sprite effect. if (__spriteEffect == NULL) { __spriteEffect = Effect::createFromFile(SPRITE_VSH, SPRITE_FSH); if (__spriteEffect == NULL) { GP_ERROR("Unable to load sprite effect."); return NULL; } effect = __spriteEffect; } else { effect = __spriteEffect; __spriteEffect->addRef(); } } // Search for the first sampler uniform in the effect. Uniform* samplerUniform = NULL; for (unsigned int i = 0, count = effect->getUniformCount(); i < count; ++i) { Uniform* uniform = effect->getUniform(i); if (uniform && uniform->getType() == GL_SAMPLER_2D) { samplerUniform = uniform; break; } } if (!samplerUniform) { GP_ERROR("No uniform of type GL_SAMPLER_2D found in sprite effect."); SAFE_RELEASE(effect); return NULL; } // Wrap the effect in a material Material* material = Material::create(effect); // +ref effect // Set initial material state material->getStateBlock()->setBlend(true); material->getStateBlock()->setBlendSrc(RenderState::BLEND_SRC_ALPHA); material->getStateBlock()->setBlendDst(RenderState::BLEND_ONE_MINUS_SRC_ALPHA); //material->getStateBlock()->setDepthFunction(RenderState::DEPTH_LEQUAL); // Bind the texture to the material as a sampler Texture::Sampler* sampler = Texture::Sampler::create(texture); // +ref texture material->getParameter(samplerUniform->getName())->setValue(sampler); // Define the vertex format for the batch VertexFormat::Element vertexElements[] = { VertexFormat::Element(VertexFormat::POSITION, 3), VertexFormat::Element(VertexFormat::TEXCOORD0, 2), VertexFormat::Element(VertexFormat::COLOR, 4) }; VertexFormat vertexFormat(vertexElements, 3); // Create the mesh batch MeshBatch* meshBatch = MeshBatch::create(vertexFormat, Mesh::TRIANGLE_STRIP, material, true, initialCapacity > 0 ? initialCapacity : SPRITE_BATCH_DEFAULT_SIZE); material->release(); // don't call SAFE_RELEASE since material is used below // Create the batch SpriteBatch* batch = new SpriteBatch(); batch->_sampler = sampler; batch->_customEffect = customEffect; batch->_batch = meshBatch; batch->_textureWidthRatio = 1.0f / (float)texture->getWidth(); batch->_textureHeightRatio = 1.0f / (float)texture->getHeight(); // Bind an ortho projection to the material by default (user can override with setProjectionMatrix) Game* game = Game::getInstance(); Matrix::createOrthographicOffCenter(0, game->getViewport().width, game->getViewport().height, 0, 0, 1, &batch->_projectionMatrix); material->getParameter("u_projectionMatrix")->bindValue(batch, &SpriteBatch::getProjectionMatrix); return batch; }
gl::Error InputLayoutCache::applyVertexBuffers(TranslatedAttribute attributes[gl::MAX_VERTEX_ATTRIBS], GLenum mode, gl::Program *program) { ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program); int sortedSemanticIndices[gl::MAX_VERTEX_ATTRIBS]; programD3D->sortAttributesByLayout(attributes, sortedSemanticIndices); bool programUsesInstancedPointSprites = programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation(); bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS); if (!mDevice || !mDeviceContext) { return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized."); } InputLayoutKey ilKey = { 0 }; static const char* semanticName = "TEXCOORD"; unsigned int firstIndexedElement = gl::MAX_VERTEX_ATTRIBS; unsigned int firstInstancedElement = gl::MAX_VERTEX_ATTRIBS; unsigned int nextAvailableInputSlot = 0; for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { if (attributes[i].active) { D3D11_INPUT_CLASSIFICATION inputClass = attributes[i].divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA; // If rendering points and instanced pointsprite emulation is being used, the inputClass is required to be configured as per instance data inputClass = instancedPointSpritesActive ? D3D11_INPUT_PER_INSTANCE_DATA : inputClass; gl::VertexFormat vertexFormat(*attributes[i].attribute, attributes[i].currentValueType); const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat, mFeatureLevel); // Record the type of the associated vertex shader vector in our key // This will prevent mismatched vertex shaders from using the same input layout GLint attributeSize; program->getActiveAttribute(ilKey.elementCount, 0, NULL, &attributeSize, &ilKey.elements[ilKey.elementCount].glslElementType, NULL); ilKey.elements[ilKey.elementCount].desc.SemanticName = semanticName; ilKey.elements[ilKey.elementCount].desc.SemanticIndex = sortedSemanticIndices[i]; ilKey.elements[ilKey.elementCount].desc.Format = vertexFormatInfo.nativeFormat; ilKey.elements[ilKey.elementCount].desc.InputSlot = i; ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0; ilKey.elements[ilKey.elementCount].desc.InputSlotClass = inputClass; ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = instancedPointSpritesActive ? 1 : attributes[i].divisor; if (inputClass == D3D11_INPUT_PER_VERTEX_DATA && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS) { firstIndexedElement = ilKey.elementCount; } else if (inputClass == D3D11_INPUT_PER_INSTANCE_DATA && firstInstancedElement == gl::MAX_VERTEX_ATTRIBS) { firstInstancedElement = ilKey.elementCount; } ilKey.elementCount++; nextAvailableInputSlot = i + 1; } } // Instanced PointSprite emulation requires additional entries in the // inputlayout to support the vertices that make up the pointsprite quad. // We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the input layout must match the shader if (programUsesInstancedPointSprites) { ilKey.elements[ilKey.elementCount].desc.SemanticName = "SPRITEPOSITION"; ilKey.elements[ilKey.elementCount].desc.SemanticIndex = 0; ilKey.elements[ilKey.elementCount].desc.Format = DXGI_FORMAT_R32G32B32_FLOAT; ilKey.elements[ilKey.elementCount].desc.InputSlot = nextAvailableInputSlot; ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0; ilKey.elements[ilKey.elementCount].desc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = 0; // The new elements are D3D11_INPUT_PER_VERTEX_DATA data so the indexed element // tracking must be applied. This ensures that the instancing specific // buffer swapping logic continues to work. if (firstIndexedElement == gl::MAX_VERTEX_ATTRIBS) { firstIndexedElement = ilKey.elementCount; } ilKey.elementCount++; ilKey.elements[ilKey.elementCount].desc.SemanticName = "SPRITETEXCOORD"; ilKey.elements[ilKey.elementCount].desc.SemanticIndex = 0; ilKey.elements[ilKey.elementCount].desc.Format = DXGI_FORMAT_R32G32_FLOAT; ilKey.elements[ilKey.elementCount].desc.InputSlot = nextAvailableInputSlot; ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = sizeof(float) * 3; ilKey.elements[ilKey.elementCount].desc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = 0; ilKey.elementCount++; } // On 9_3, we must ensure that slot 0 contains non-instanced data. // If slot 0 currently contains instanced data then we swap it with a non-instanced element. // Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3 doesn't support OpenGL ES 3.0. // As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced simultaneously, so a non-instanced element must exist. ASSERT(!(mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS)); bool moveFirstIndexedIntoSlotZero = mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && firstInstancedElement == 0 && firstIndexedElement != gl::MAX_VERTEX_ATTRIBS; if (moveFirstIndexedIntoSlotZero) { ilKey.elements[firstInstancedElement].desc.InputSlot = ilKey.elements[firstIndexedElement].desc.InputSlot; ilKey.elements[firstIndexedElement].desc.InputSlot = 0; // Instanced PointSprite emulation uses multiple layout entries across a single vertex buffer. // If an index swap is performed, we need to ensure that all elements get the proper InputSlot. if (programUsesInstancedPointSprites) { ilKey.elements[firstIndexedElement + 1].desc.InputSlot = 0; } } ID3D11InputLayout *inputLayout = NULL; InputLayoutMap::iterator keyIter = mInputLayoutMap.find(ilKey); if (keyIter != mInputLayoutMap.end()) { inputLayout = keyIter->second.inputLayout; keyIter->second.lastUsedTime = mCounter++; } else { gl::VertexFormat shaderInputLayout[gl::MAX_VERTEX_ATTRIBS]; GetInputLayout(attributes, shaderInputLayout); ShaderExecutableD3D *shader = NULL; gl::Error error = programD3D->getVertexExecutableForInputLayout(shaderInputLayout, &shader, nullptr); if (error.isError()) { return error; } ShaderExecutableD3D *shader11 = ShaderExecutable11::makeShaderExecutable11(shader); D3D11_INPUT_ELEMENT_DESC descs[gl::MAX_VERTEX_ATTRIBS]; for (unsigned int j = 0; j < ilKey.elementCount; ++j) { descs[j] = ilKey.elements[j].desc; } HRESULT result = mDevice->CreateInputLayout(descs, ilKey.elementCount, shader11->getFunction(), shader11->getLength(), &inputLayout); if (FAILED(result)) { return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal input layout, HRESULT: 0x%08x", result); } if (mInputLayoutMap.size() >= kMaxInputLayouts) { TRACE("Overflowed the limit of %u input layouts, removing the least recently used " "to make room.", kMaxInputLayouts); InputLayoutMap::iterator leastRecentlyUsed = mInputLayoutMap.begin(); for (InputLayoutMap::iterator i = mInputLayoutMap.begin(); i != mInputLayoutMap.end(); i++) { if (i->second.lastUsedTime < leastRecentlyUsed->second.lastUsedTime) { leastRecentlyUsed = i; } } SafeRelease(leastRecentlyUsed->second.inputLayout); mInputLayoutMap.erase(leastRecentlyUsed); } InputLayoutCounterPair inputCounterPair; inputCounterPair.inputLayout = inputLayout; inputCounterPair.lastUsedTime = mCounter++; mInputLayoutMap.insert(std::make_pair(ilKey, inputCounterPair)); } if (inputLayout != mCurrentIL) { mDeviceContext->IASetInputLayout(inputLayout); mCurrentIL = inputLayout; } bool dirtyBuffers = false; size_t minDiff = gl::MAX_VERTEX_ATTRIBS; size_t maxDiff = 0; unsigned int nextAvailableIndex = 0; for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { ID3D11Buffer *buffer = NULL; if (attributes[i].active) { VertexBuffer11 *vertexBuffer = VertexBuffer11::makeVertexBuffer11(attributes[i].vertexBuffer); Buffer11 *bufferStorage = attributes[i].storage ? Buffer11::makeBuffer11(attributes[i].storage) : NULL; buffer = bufferStorage ? bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK) : vertexBuffer->getBuffer(); } UINT vertexStride = attributes[i].stride; UINT vertexOffset = attributes[i].offset; if (buffer != mCurrentBuffers[i] || vertexStride != mCurrentVertexStrides[i] || vertexOffset != mCurrentVertexOffsets[i]) { dirtyBuffers = true; minDiff = std::min(minDiff, static_cast<size_t>(i)); maxDiff = std::max(maxDiff, static_cast<size_t>(i)); mCurrentBuffers[i] = buffer; mCurrentVertexStrides[i] = vertexStride; mCurrentVertexOffsets[i] = vertexOffset; // If a non null ID3D11Buffer is being assigned to mCurrentBuffers, // then the next available index needs to be tracked to ensure // that any instanced pointsprite emulation buffers will be properly packed. if (buffer) { nextAvailableIndex = i + 1; } } } // Instanced PointSprite emulation requires two additional ID3D11Buffers. // A vertex buffer needs to be created and added to the list of current buffers, // strides and offsets collections. This buffer contains the vertices for a single // PointSprite quad. // An index buffer also needs to be created and applied because rendering instanced // data on D3D11 FL9_3 requires DrawIndexedInstanced() to be used. if (instancedPointSpritesActive) { HRESULT result = S_OK; const UINT pointSpriteVertexStride = sizeof(float) * 5; if (!mPointSpriteVertexBuffer) { static const float pointSpriteVertices[] = { // Position // TexCoord -1.0f, -1.0f, 0.0f, 0.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, -1.0f, 0.0f, 1.0f, 1.0f, -1.0f, -1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 0.0f, }; D3D11_SUBRESOURCE_DATA vertexBufferData = { pointSpriteVertices, 0, 0 }; D3D11_BUFFER_DESC vertexBufferDesc; vertexBufferDesc.ByteWidth = sizeof(pointSpriteVertices); vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; vertexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE; vertexBufferDesc.CPUAccessFlags = 0; vertexBufferDesc.MiscFlags = 0; vertexBufferDesc.StructureByteStride = 0; result = mDevice->CreateBuffer(&vertexBufferDesc, &vertexBufferData, &mPointSpriteVertexBuffer); if (FAILED(result)) { return gl::Error(GL_OUT_OF_MEMORY, "Failed to create instanced pointsprite emulation vertex buffer, HRESULT: 0x%08x", result); } } mCurrentBuffers[nextAvailableIndex] = mPointSpriteVertexBuffer; mCurrentVertexStrides[nextAvailableIndex] = pointSpriteVertexStride; mCurrentVertexOffsets[nextAvailableIndex] = 0; if (!mPointSpriteIndexBuffer) { // Create an index buffer and set it for pointsprite rendering static const unsigned short pointSpriteIndices[] = { 0, 1, 2, 3, 4, 5, }; D3D11_SUBRESOURCE_DATA indexBufferData = { pointSpriteIndices, 0, 0 }; D3D11_BUFFER_DESC indexBufferDesc; indexBufferDesc.ByteWidth = sizeof(pointSpriteIndices); indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; indexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE; indexBufferDesc.CPUAccessFlags = 0; indexBufferDesc.MiscFlags = 0; indexBufferDesc.StructureByteStride = 0; result = mDevice->CreateBuffer(&indexBufferDesc, &indexBufferData, &mPointSpriteIndexBuffer); if (FAILED(result)) { SafeRelease(mPointSpriteVertexBuffer); return gl::Error(GL_OUT_OF_MEMORY, "Failed to create instanced pointsprite emulation index buffer, HRESULT: 0x%08x", result); } } // The index buffer is applied here because Instanced PointSprite emulation uses // the a non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer() // on the renderer will not be called and setting this buffer here ensures that the rendering // path will contain the correct index buffers. mDeviceContext->IASetIndexBuffer(mPointSpriteIndexBuffer, DXGI_FORMAT_R16_UINT, 0); } if (moveFirstIndexedIntoSlotZero) { // In this case, we swapped the slots of the first instanced element and the first indexed element, to ensure // that the first slot contains non-instanced data (required by Feature Level 9_3). // We must also swap the corresponding buffers sent to IASetVertexBuffers so that the correct data is sent to each slot. std::swap(mCurrentBuffers[firstIndexedElement], mCurrentBuffers[firstInstancedElement]); std::swap(mCurrentVertexStrides[firstIndexedElement], mCurrentVertexStrides[firstInstancedElement]); std::swap(mCurrentVertexOffsets[firstIndexedElement], mCurrentVertexOffsets[firstInstancedElement]); } if (dirtyBuffers) { ASSERT(minDiff <= maxDiff && maxDiff < gl::MAX_VERTEX_ATTRIBS); mDeviceContext->IASetVertexBuffers(minDiff, maxDiff - minDiff + 1, mCurrentBuffers + minDiff, mCurrentVertexStrides + minDiff, mCurrentVertexOffsets + minDiff); } return gl::Error(GL_NO_ERROR); }
GLenum VertexDeclarationCache::applyDeclaration(IDirect3DDevice9 *device, TranslatedAttribute attributes[], gl::ProgramBinary *programBinary, GLsizei instances, GLsizei *repeatDraw) { *repeatDraw = 1; int indexedAttribute = gl::MAX_VERTEX_ATTRIBS; int instancedAttribute = gl::MAX_VERTEX_ATTRIBS; if (instances > 0) { // Find an indexed attribute to be mapped to D3D stream 0 for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { if (attributes[i].active) { if (indexedAttribute == gl::MAX_VERTEX_ATTRIBS && attributes[i].divisor == 0) { indexedAttribute = i; } else if (instancedAttribute == gl::MAX_VERTEX_ATTRIBS && attributes[i].divisor != 0) { instancedAttribute = i; } if (indexedAttribute != gl::MAX_VERTEX_ATTRIBS && instancedAttribute != gl::MAX_VERTEX_ATTRIBS) break; // Found both an indexed and instanced attribute } } if (indexedAttribute == gl::MAX_VERTEX_ATTRIBS) { return GL_INVALID_OPERATION; } } D3DVERTEXELEMENT9 elements[gl::MAX_VERTEX_ATTRIBS + 1]; D3DVERTEXELEMENT9 *element = &elements[0]; for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { if (attributes[i].active) { // Directly binding the storage buffer is not supported for d3d9 ASSERT(attributes[i].storage == NULL); int stream = i; if (instances > 0) { // Due to a bug on ATI cards we can't enable instancing when none of the attributes are instanced. if (instancedAttribute == gl::MAX_VERTEX_ATTRIBS) { *repeatDraw = instances; } else { if (i == indexedAttribute) { stream = 0; } else if (i == 0) { stream = indexedAttribute; } UINT frequency = 1; if (attributes[i].divisor == 0) { frequency = D3DSTREAMSOURCE_INDEXEDDATA | instances; } else { frequency = D3DSTREAMSOURCE_INSTANCEDATA | attributes[i].divisor; } device->SetStreamSourceFreq(stream, frequency); mInstancingEnabled = true; } } VertexBuffer9 *vertexBuffer = VertexBuffer9::makeVertexBuffer9(attributes[i].vertexBuffer); if (mAppliedVBs[stream].serial != attributes[i].serial || mAppliedVBs[stream].stride != attributes[i].stride || mAppliedVBs[stream].offset != attributes[i].offset) { device->SetStreamSource(stream, vertexBuffer->getBuffer(), attributes[i].offset, attributes[i].stride); mAppliedVBs[stream].serial = attributes[i].serial; mAppliedVBs[stream].stride = attributes[i].stride; mAppliedVBs[stream].offset = attributes[i].offset; } gl::VertexFormat vertexFormat(*attributes[i].attribute, GL_FLOAT); element->Stream = stream; element->Offset = 0; element->Type = d3d9::GetNativeVertexFormat(vertexFormat); element->Method = D3DDECLMETHOD_DEFAULT; element->Usage = D3DDECLUSAGE_TEXCOORD; element->UsageIndex = programBinary->getSemanticIndex(i); element++; } } if (instances == 0 || instancedAttribute == gl::MAX_VERTEX_ATTRIBS) { if (mInstancingEnabled) { for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { device->SetStreamSourceFreq(i, 1); } mInstancingEnabled = false; } } static const D3DVERTEXELEMENT9 end = D3DDECL_END(); *(element++) = end; for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++) { VertexDeclCacheEntry *entry = &mVertexDeclCache[i]; if (memcmp(entry->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9)) == 0 && entry->vertexDeclaration) { entry->lruCount = ++mMaxLru; if(entry->vertexDeclaration != mLastSetVDecl) { device->SetVertexDeclaration(entry->vertexDeclaration); mLastSetVDecl = entry->vertexDeclaration; } return GL_NO_ERROR; } } VertexDeclCacheEntry *lastCache = mVertexDeclCache; for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++) { if (mVertexDeclCache[i].lruCount < lastCache->lruCount) { lastCache = &mVertexDeclCache[i]; } } if (lastCache->vertexDeclaration != NULL) { SafeRelease(lastCache->vertexDeclaration); // mLastSetVDecl is set to the replacement, so we don't have to worry // about it. } memcpy(lastCache->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9)); device->CreateVertexDeclaration(elements, &lastCache->vertexDeclaration); device->SetVertexDeclaration(lastCache->vertexDeclaration); mLastSetVDecl = lastCache->vertexDeclaration; lastCache->lruCount = ++mMaxLru; return GL_NO_ERROR; }
gl::Error InputLayoutCache::applyVertexBuffers(TranslatedAttribute attributes[gl::MAX_VERTEX_ATTRIBS], gl::ProgramBinary *programBinary) { int sortedSemanticIndices[gl::MAX_VERTEX_ATTRIBS]; programBinary->sortAttributesByLayout(attributes, sortedSemanticIndices); if (!mDevice || !mDeviceContext) { return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized."); } InputLayoutKey ilKey = { 0 }; static const char* semanticName = "TEXCOORD"; for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { if (attributes[i].active) { D3D11_INPUT_CLASSIFICATION inputClass = attributes[i].divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA; gl::VertexFormat vertexFormat(*attributes[i].attribute, attributes[i].currentValueType); const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat); // Record the type of the associated vertex shader vector in our key // This will prevent mismatched vertex shaders from using the same input layout GLint attributeSize; programBinary->getActiveAttribute(sortedSemanticIndices[i], 0, NULL, &attributeSize, &ilKey.elements[ilKey.elementCount].glslElementType, NULL); ilKey.elements[ilKey.elementCount].desc.SemanticName = semanticName; ilKey.elements[ilKey.elementCount].desc.SemanticIndex = i; ilKey.elements[ilKey.elementCount].desc.Format = vertexFormatInfo.nativeFormat; ilKey.elements[ilKey.elementCount].desc.InputSlot = i; ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0; ilKey.elements[ilKey.elementCount].desc.InputSlotClass = inputClass; ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = attributes[i].divisor; ilKey.elementCount++; } } ID3D11InputLayout *inputLayout = NULL; InputLayoutMap::iterator keyIter = mInputLayoutMap.find(ilKey); if (keyIter != mInputLayoutMap.end()) { inputLayout = keyIter->second.inputLayout; keyIter->second.lastUsedTime = mCounter++; } else { gl::VertexFormat shaderInputLayout[gl::MAX_VERTEX_ATTRIBS]; GetInputLayout(attributes, shaderInputLayout); ProgramD3D *programD3D = ProgramD3D::makeProgramD3D(programBinary->getImplementation()); ShaderExecutable *shader = NULL; gl::Error error = programD3D->getVertexExecutableForInputLayout(shaderInputLayout, &shader); if (error.isError()) { return error; } ShaderExecutable *shader11 = ShaderExecutable11::makeShaderExecutable11(shader); D3D11_INPUT_ELEMENT_DESC descs[gl::MAX_VERTEX_ATTRIBS]; for (unsigned int j = 0; j < ilKey.elementCount; ++j) { descs[j] = ilKey.elements[j].desc; } HRESULT result = mDevice->CreateInputLayout(descs, ilKey.elementCount, shader11->getFunction(), shader11->getLength(), &inputLayout); if (FAILED(result)) { return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal input layout, HRESULT: 0x%08x", result); } if (mInputLayoutMap.size() >= kMaxInputLayouts) { TRACE("Overflowed the limit of %u input layouts, removing the least recently used " "to make room.", kMaxInputLayouts); InputLayoutMap::iterator leastRecentlyUsed = mInputLayoutMap.begin(); for (InputLayoutMap::iterator i = mInputLayoutMap.begin(); i != mInputLayoutMap.end(); i++) { if (i->second.lastUsedTime < leastRecentlyUsed->second.lastUsedTime) { leastRecentlyUsed = i; } } SafeRelease(leastRecentlyUsed->second.inputLayout); mInputLayoutMap.erase(leastRecentlyUsed); } InputLayoutCounterPair inputCounterPair; inputCounterPair.inputLayout = inputLayout; inputCounterPair.lastUsedTime = mCounter++; mInputLayoutMap.insert(std::make_pair(ilKey, inputCounterPair)); } if (inputLayout != mCurrentIL) { mDeviceContext->IASetInputLayout(inputLayout); mCurrentIL = inputLayout; } bool dirtyBuffers = false; size_t minDiff = gl::MAX_VERTEX_ATTRIBS; size_t maxDiff = 0; for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { ID3D11Buffer *buffer = NULL; if (attributes[i].active) { VertexBuffer11 *vertexBuffer = VertexBuffer11::makeVertexBuffer11(attributes[i].vertexBuffer); Buffer11 *bufferStorage = attributes[i].storage ? Buffer11::makeBuffer11(attributes[i].storage) : NULL; buffer = bufferStorage ? bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK) : vertexBuffer->getBuffer(); } UINT vertexStride = attributes[i].stride; UINT vertexOffset = attributes[i].offset; if (buffer != mCurrentBuffers[i] || vertexStride != mCurrentVertexStrides[i] || vertexOffset != mCurrentVertexOffsets[i]) { dirtyBuffers = true; minDiff = std::min(minDiff, static_cast<size_t>(i)); maxDiff = std::max(maxDiff, static_cast<size_t>(i)); mCurrentBuffers[i] = buffer; mCurrentVertexStrides[i] = vertexStride; mCurrentVertexOffsets[i] = vertexOffset; } } if (dirtyBuffers) { ASSERT(minDiff <= maxDiff && maxDiff < gl::MAX_VERTEX_ATTRIBS); mDeviceContext->IASetVertexBuffers(minDiff, maxDiff - minDiff + 1, mCurrentBuffers + minDiff, mCurrentVertexStrides + minDiff, mCurrentVertexOffsets + minDiff); } return gl::Error(GL_NO_ERROR); }
OverlayBatch::OverlayBatch(GraphicsContext* graphicsContext, std::size_t maxOverlayCount): maxOverlayCount(maxOverlayCount), overlayCount(0), vertexBuffer(nullptr), indexBuffer(nullptr), image(nullptr) { std::size_t vertexCount = maxOverlayCount * 4; std::size_t indexCount = maxOverlayCount * 6; // Create vertex buffer VertexAttribute attributes[] = { VertexAttribute(VertexAttributeFormat::VECTOR_3, 0), VertexAttribute(VertexAttributeFormat::VECTOR_2, 1), VertexAttribute(VertexAttributeFormat::VECTOR_4, 5) }; VertexFormat vertexFormat(0, attributes, 3); vertexBuffer = graphicsContext->createVertexBuffer(vertexFormat, vertexCount, BufferUsage::DYNAMIC_DRAW); // Create index buffer IndexFormat indexFormat = (vertexCount > 65536) ? IndexFormat::UINT32 : IndexFormat::UINT16; indexBuffer = graphicsContext->createIndexBuffer(indexFormat, indexCount, BufferUsage::STATIC_DRAW); // Calculate indices switch (indexFormat) { case IndexFormat::UINT16: { std::uint16_t indices[indexCount]; std::uint16_t* index = indices; for (std::uint16_t i = 0; i < vertexCount; i += 4) { *(index++) = i; *(index++) = i + 1; *(index++) = i + 2; *(index++) = i; *(index++) = i + 2; *(index++) = i + 3; } indexBuffer->setData(indices); break; } case IndexFormat::UINT32: { std::uint32_t indices[indexCount]; std::uint32_t* index = indices; for (std::uint32_t i = 0; i < vertexCount; ++i) { *(index++) = i; *(index++) = i + 1; *(index++) = i + 2; *(index++) = i; *(index++) = i + 2; *(index++) = i + 3; } indexBuffer->setData(indices); break; } } }
gl::Error VertexBuffer9::storeVertexAttributes(const gl::VertexAttribute &attrib, const gl::VertexAttribCurrentValueData ¤tValue, GLint start, GLsizei count, GLsizei instances, unsigned int offset) { if (!mVertexBuffer) { return gl::Error(GL_OUT_OF_MEMORY, "Internal vertex buffer is not initialized."); } gl::Buffer *buffer = attrib.buffer.get(); int inputStride = gl::ComputeVertexAttributeStride(attrib); int elementSize = gl::ComputeVertexAttributeTypeSize(attrib); DWORD lockFlags = mDynamicUsage ? D3DLOCK_NOOVERWRITE : 0; uint8_t *mapPtr = NULL; unsigned int mapSize; gl::Error error = spaceRequired(attrib, count, instances, &mapSize); if (error.isError()) { return error; } HRESULT result = mVertexBuffer->Lock(offset, mapSize, reinterpret_cast<void**>(&mapPtr), lockFlags); if (FAILED(result)) { return gl::Error(GL_OUT_OF_MEMORY, "Failed to lock internal vertex buffer, HRESULT: 0x%08x.", result); } const uint8_t *input = NULL; if (attrib.enabled) { if (buffer) { BufferD3D *storage = BufferD3D::makeFromBuffer(buffer); ASSERT(storage); gl::Error error = storage->getData(&input); if (error.isError()) { return error; } input += static_cast<int>(attrib.offset); } else { input = static_cast<const uint8_t*>(attrib.pointer); } } else { input = reinterpret_cast<const uint8_t*>(currentValue.FloatValues); } if (instances == 0 || attrib.divisor == 0) { input += inputStride * start; } gl::VertexFormat vertexFormat(attrib, currentValue.Type); const d3d9::VertexFormat &d3dVertexInfo = d3d9::GetVertexFormatInfo(mRenderer->getCapsDeclTypes(), vertexFormat); bool needsConversion = (d3dVertexInfo.conversionType & VERTEX_CONVERT_CPU) > 0; if (!needsConversion && inputStride == elementSize) { size_t copySize = static_cast<size_t>(count) * static_cast<size_t>(inputStride); memcpy(mapPtr, input, copySize); } else { d3dVertexInfo.copyFunction(input, inputStride, count, mapPtr); } mVertexBuffer->Unlock(); return gl::Error(GL_NO_ERROR); }
gl::Error VertexDeclarationCache::applyDeclaration(IDirect3DDevice9 *device, TranslatedAttribute attributes[], gl::ProgramBinary *programBinary, GLsizei instances, GLsizei *repeatDraw) { *repeatDraw = 1; int indexedAttribute = gl::MAX_VERTEX_ATTRIBS; int instancedAttribute = gl::MAX_VERTEX_ATTRIBS; if (instances == 0) { for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; ++i) { if (attributes[i].divisor != 0) { // If a divisor is set, it still applies even if an instanced draw was not used, so treat // as a single-instance draw. instances = 1; break; } } } if (instances > 0) { // Find an indexed attribute to be mapped to D3D stream 0 for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { if (attributes[i].active) { if (indexedAttribute == gl::MAX_VERTEX_ATTRIBS && attributes[i].divisor == 0) { indexedAttribute = i; } else if (instancedAttribute == gl::MAX_VERTEX_ATTRIBS && attributes[i].divisor != 0) { instancedAttribute = i; } if (indexedAttribute != gl::MAX_VERTEX_ATTRIBS && instancedAttribute != gl::MAX_VERTEX_ATTRIBS) break; // Found both an indexed and instanced attribute } } // The validation layer checks that there is at least one active attribute with a zero divisor as per // the GL_ANGLE_instanced_arrays spec. ASSERT(indexedAttribute != gl::MAX_VERTEX_ATTRIBS); } D3DCAPS9 caps; device->GetDeviceCaps(&caps); D3DVERTEXELEMENT9 elements[gl::MAX_VERTEX_ATTRIBS + 1]; D3DVERTEXELEMENT9 *element = &elements[0]; for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { if (attributes[i].active) { // Directly binding the storage buffer is not supported for d3d9 ASSERT(attributes[i].storage == NULL); int stream = i; if (instances > 0) { // Due to a bug on ATI cards we can't enable instancing when none of the attributes are instanced. if (instancedAttribute == gl::MAX_VERTEX_ATTRIBS) { *repeatDraw = instances; } else { if (i == indexedAttribute) { stream = 0; } else if (i == 0) { stream = indexedAttribute; } UINT frequency = 1; if (attributes[i].divisor == 0) { frequency = D3DSTREAMSOURCE_INDEXEDDATA | instances; } else { frequency = D3DSTREAMSOURCE_INSTANCEDATA | attributes[i].divisor; } device->SetStreamSourceFreq(stream, frequency); mInstancingEnabled = true; } } VertexBuffer9 *vertexBuffer = VertexBuffer9::makeVertexBuffer9(attributes[i].vertexBuffer); if (mAppliedVBs[stream].serial != attributes[i].serial || mAppliedVBs[stream].stride != attributes[i].stride || mAppliedVBs[stream].offset != attributes[i].offset) { device->SetStreamSource(stream, vertexBuffer->getBuffer(), attributes[i].offset, attributes[i].stride); mAppliedVBs[stream].serial = attributes[i].serial; mAppliedVBs[stream].stride = attributes[i].stride; mAppliedVBs[stream].offset = attributes[i].offset; } gl::VertexFormat vertexFormat(*attributes[i].attribute, GL_FLOAT); const d3d9::VertexFormat &d3d9VertexInfo = d3d9::GetVertexFormatInfo(caps.DeclTypes, vertexFormat); element->Stream = stream; element->Offset = 0; element->Type = d3d9VertexInfo.nativeFormat; element->Method = D3DDECLMETHOD_DEFAULT; element->Usage = D3DDECLUSAGE_TEXCOORD; element->UsageIndex = programBinary->getSemanticIndex(i); element++; } } if (instances == 0 || instancedAttribute == gl::MAX_VERTEX_ATTRIBS) { if (mInstancingEnabled) { for (int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { device->SetStreamSourceFreq(i, 1); } mInstancingEnabled = false; } } static const D3DVERTEXELEMENT9 end = D3DDECL_END(); *(element++) = end; for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++) { VertexDeclCacheEntry *entry = &mVertexDeclCache[i]; if (memcmp(entry->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9)) == 0 && entry->vertexDeclaration) { entry->lruCount = ++mMaxLru; if(entry->vertexDeclaration != mLastSetVDecl) { device->SetVertexDeclaration(entry->vertexDeclaration); mLastSetVDecl = entry->vertexDeclaration; } return gl::Error(GL_NO_ERROR); } } VertexDeclCacheEntry *lastCache = mVertexDeclCache; for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++) { if (mVertexDeclCache[i].lruCount < lastCache->lruCount) { lastCache = &mVertexDeclCache[i]; } } if (lastCache->vertexDeclaration != NULL) { SafeRelease(lastCache->vertexDeclaration); // mLastSetVDecl is set to the replacement, so we don't have to worry // about it. } memcpy(lastCache->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9)); HRESULT result = device->CreateVertexDeclaration(elements, &lastCache->vertexDeclaration); if (FAILED(result)) { return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal vertex declaration, result: 0x%X.", result); } device->SetVertexDeclaration(lastCache->vertexDeclaration); mLastSetVDecl = lastCache->vertexDeclaration; lastCache->lruCount = ++mMaxLru; return gl::Error(GL_NO_ERROR); }