void PGRAPH::Begin(Primitive primitive) { // Set surface setSurface(); // Set viewport gfx::Viewport viewportRect = { viewport.x, viewport.y, viewport.width, viewport.height, 0.0f, 1.0f }; gfx::Rectangle scissorRect = { scissor.x, scissor.y, scissor.width, scissor.height }; cmdBuffer->cmdSetViewports(1, &viewportRect); cmdBuffer->cmdSetScissors(1, &scissorRect); // Hashing auto vpData = &vpe.data[vpe.start]; auto vpHash = HashVertexProgram(vpData); auto fpData = memory->ptr<rsx_fp_instruction_t>((fp_location ? rsx->get_ea(0x0) : 0xC0000000) + fp_offset); auto fpHash = HashFragmentProgram(fpData); auto pipelineHash = hashStruct(pipeline) ^ vpHash ^ fpHash; if (cachePipeline.find(pipelineHash) == cachePipeline.end()) { const auto& p = pipeline; if (cacheVP.find(vpHash) == cacheVP.end()) { auto vp = std::make_unique<RSXVertexProgram>(); vp->decompile(vpData); vp->compile(graphics.get()); cacheVP[vpHash] = std::move(vp); } if (cacheFP.find(fpHash) == cacheFP.end()) { auto fp = std::make_unique<RSXFragmentProgram>(); fp->decompile(fpData); fp->compile(graphics.get()); cacheFP[fpHash] = std::move(fp); } gfx::PipelineDesc pipelineDesc = {}; pipelineDesc.formatDSV = convertFormat(surface.depthFormat); pipelineDesc.numCBVs = 2; pipelineDesc.numSRVs = RSX_MAX_TEXTURES; pipelineDesc.vs = cacheVP[vpHash]->shader; pipelineDesc.ps = cacheFP[fpHash]->shader; pipelineDesc.rsState.fillMode = gfx::FILL_MODE_SOLID; pipelineDesc.rsState.cullMode = p.cull_face_enable ? convertCullMode(p.cull_mode) : gfx::CULL_MODE_NONE; pipelineDesc.rsState.frontCounterClockwise = convertFrontFace(p.front_face); pipelineDesc.rsState.depthEnable = p.depth_test_enable; pipelineDesc.rsState.depthWriteMask = p.depth_mask ? gfx::DEPTH_WRITE_MASK_ALL : gfx::DEPTH_WRITE_MASK_ZERO; pipelineDesc.rsState.depthFunc = convertCompareFunc(p.depth_func); pipelineDesc.rsState.stencilEnable = p.stencil_test_enable; pipelineDesc.rsState.stencilReadMask = p.stencil_func_mask; pipelineDesc.rsState.stencilWriteMask = p.stencil_mask; pipelineDesc.rsState.frontFace.stencilOpFail = convertStencilOp(p.stencil_op_fail); pipelineDesc.rsState.frontFace.stencilOpZFail = convertStencilOp(p.stencil_op_zfail); pipelineDesc.rsState.frontFace.stencilOpPass = convertStencilOp(p.stencil_op_zpass); pipelineDesc.rsState.frontFace.stencilFunc = convertCompareFunc(p.stencil_func); if (p.two_sided_stencil_test_enable) { pipelineDesc.rsState.backFace.stencilOpFail = convertStencilOp(p.stencil_op_fail); pipelineDesc.rsState.backFace.stencilOpZFail = convertStencilOp(p.stencil_op_zfail); pipelineDesc.rsState.backFace.stencilOpPass = convertStencilOp(p.stencil_op_zpass); pipelineDesc.rsState.backFace.stencilFunc = convertCompareFunc(p.stencil_func); } else { pipelineDesc.rsState.backFace.stencilOpFail = convertStencilOp(p.back_stencil_op_fail); pipelineDesc.rsState.backFace.stencilOpZFail = convertStencilOp(p.back_stencil_op_zfail); pipelineDesc.rsState.backFace.stencilOpPass = convertStencilOp(p.back_stencil_op_zpass); pipelineDesc.rsState.backFace.stencilFunc = convertCompareFunc(p.back_stencil_func); } pipelineDesc.cbState.colorTarget[0].enableBlend = p.blend_enable; pipelineDesc.cbState.colorTarget[0].enableLogicOp = p.logic_op_enable; pipelineDesc.cbState.colorTarget[0].blendOp = convertBlendOp(p.blend_equation_rgb); pipelineDesc.cbState.colorTarget[0].blendOpAlpha = convertBlendOp(p.blend_equation_alpha); pipelineDesc.cbState.colorTarget[0].srcBlend = convertBlend(p.blend_sfactor_rgb); pipelineDesc.cbState.colorTarget[0].destBlend = convertBlend(p.blend_dfactor_rgb); pipelineDesc.cbState.colorTarget[0].srcBlendAlpha = convertBlend(p.blend_sfactor_alpha); pipelineDesc.cbState.colorTarget[0].destBlendAlpha = convertBlend(p.blend_dfactor_alpha); pipelineDesc.cbState.colorTarget[0].colorWriteMask = convertColorMask(p.color_mask); pipelineDesc.cbState.colorTarget[0].logicOp = convertLogicOp(p.logic_op); pipelineDesc.iaState.topology = convertPrimitiveTopology(primitive); for (U32 index = 0; index < RSX_MAX_VERTEX_INPUTS; index++) { const auto& attr = vpe.attr[index]; if (!attr.size) { continue; } gfx::Format format = convertVertexFormat(attr.type, attr.size); U32 stride = attr.stride; pipelineDesc.iaState.inputLayout.push_back({ index, format, index, 0, stride, 0, gfx::INPUT_CLASSIFICATION_PER_VERTEX, 0 } ); } for (U32 i = 0; i < RSX_MAX_TEXTURES; i++) { gfx::Sampler sampler = {}; sampler.filter = gfx::FILTER_MIN_MAG_MIP_LINEAR; sampler.addressU = gfx::TEXTURE_ADDRESS_MIRROR; sampler.addressV = gfx::TEXTURE_ADDRESS_MIRROR; sampler.addressW = gfx::TEXTURE_ADDRESS_MIRROR; pipelineDesc.samplers.push_back(sampler); } cachePipeline[pipelineHash] = std::unique_ptr<gfx::Pipeline>(graphics->createPipeline(pipelineDesc)); } heapResources->reset(); heapResources->pushVertexBuffer(vpeConstantMemory); heapResources->pushVertexBuffer(vtxTransform); // Upload VPE constants if necessary void* constantsPtr = vpeConstantMemory->map(); memcpy(constantsPtr, &vpe.constant, sizeof(vpe.constant)); vpeConstantMemory->unmap(); // Upload vertex transform matrix if necessary if (vertex_transform_dirty) { V128* transformPtr = reinterpret_cast<V128*>(vtxTransform->map()); memset(transformPtr, 0, 4 * sizeof(V128)); F32 half_cliph = surface.width / 2.0f; F32 half_clipv = surface.height / 2.0f; transformPtr[0].f32[0] = (viewport_scale.f32[0] / half_cliph); transformPtr[1].f32[1] = (viewport_scale.f32[1] / half_clipv); transformPtr[2].f32[2] = (viewport_scale.f32[2]); transformPtr[0].f32[3] = (viewport_offset.f32[0] - half_cliph) / half_cliph; transformPtr[1].f32[3] = (viewport_offset.f32[1] - half_clipv) / half_clipv; transformPtr[2].f32[3] = (viewport_offset.f32[2]); transformPtr[3].f32[3] = 1.0f; vtxTransform->unmap(); } // Set textures for (U32 i = 0; i < RSX_MAX_TEXTURES; i++) { const auto& tex = texture[i]; // Dummy texture if (!tex.enable) { gfx::TextureDesc texDesc = {}; texDesc.width = 2; texDesc.height = 2; texDesc.format = gfx::FORMAT_R8G8B8A8_UNORM; texDesc.mipmapLevels = 1; texDesc.swizzle = TEXTURE_SWIZZLE_ENCODE( gfx::TEXTURE_SWIZZLE_VALUE_0, gfx::TEXTURE_SWIZZLE_VALUE_0, gfx::TEXTURE_SWIZZLE_VALUE_0, gfx::TEXTURE_SWIZZLE_VALUE_0 ); gfx::Texture* texDescriptor = graphics->createTexture(texDesc); heapResources->pushTexture(texDescriptor); } // Upload real texture else { auto texFormat = static_cast<TextureFormat>(tex.format & ~RSX_TEXTURE_LN & ~RSX_TEXTURE_UN); gfx::TextureDesc texDesc = {}; texDesc.data = memory->ptr<Byte>((tex.location ? rsx->get_ea(0x0) : 0xC0000000) + tex.offset); texDesc.size = tex.width * tex.height; texDesc.width = tex.width; texDesc.height = tex.height; texDesc.format = convertTextureFormat(texFormat); texDesc.mipmapLevels = tex.mipmap; texDesc.swizzle = convertTextureSwizzle(texFormat); switch (texFormat) { case RSX_TEXTURE_B8: texDesc.size *= 1; break; case RSX_TEXTURE_A1R5G5B5: texDesc.size *= 2; break; case RSX_TEXTURE_A4R4G4B4: texDesc.size *= 2; break; case RSX_TEXTURE_R5G6B5: texDesc.size *= 2; break; case RSX_TEXTURE_A8R8G8B8: texDesc.size *= 4; break; default: assert_always("Unimplemented"); } gfx::Texture* texDescriptor = graphics->createTexture(texDesc); heapResources->pushTexture(texDescriptor); } } cmdBuffer->cmdBindPipeline(cachePipeline[pipelineHash].get()); cmdBuffer->cmdSetHeaps({ heapResources }); cmdBuffer->cmdSetDescriptor(0, heapResources, 0); cmdBuffer->cmdSetDescriptor(1, heapResources, 2); cmdBuffer->cmdSetPrimitiveTopology(convertPrimitiveTopology(primitive)); }
void PGRAPH_OpenGL::DrawArrays(u32 first, u32 count) { // State glBlendFuncSeparate(blend_sfactor_rgb, blend_dfactor_rgb, blend_sfactor_alpha, blend_dfactor_alpha); // Surface if (surface.dirty) { surface.dirty = false; switch (surface.colorTarget) { case RSX_SURFACE_TARGET_NONE: break; case RSX_SURFACE_TARGET_0: SetColorTarget(surface.colorOffset[0], 0); break; case RSX_SURFACE_TARGET_1: SetColorTarget(surface.colorOffset[1], 1); break; case RSX_SURFACE_TARGET_MRT1: SetColorTarget(surface.colorOffset[0], 0); SetColorTarget(surface.colorOffset[1], 1); break; case RSX_SURFACE_TARGET_MRT2: SetColorTarget(surface.colorOffset[0], 0); SetColorTarget(surface.colorOffset[1], 1); SetColorTarget(surface.colorOffset[2], 2); break; case RSX_SURFACE_TARGET_MRT3: SetColorTarget(surface.colorOffset[0], 0); SetColorTarget(surface.colorOffset[1], 1); SetColorTarget(surface.colorOffset[2], 2); SetColorTarget(surface.colorOffset[3], 3); break; } SetDepthTarget(surface.depthOffset); GLuint fbStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (fbStatus != GL_FRAMEBUFFER_COMPLETE) { nucleus.log.error(LOG_GPU, "PGRAPH_OpenGL::DrawArrays: Framebuffer is not complete (0x%X)", fbStatus); } } // Viewport if (viewport.dirty) { viewport.dirty = false; glViewport(viewport.x, viewport.y, viewport.width, viewport.height); } // Shaders auto vp_data = &vpe.data[vpe.start]; auto vp_hash = HashVertexProgram(vp_data); if (cache_vp.find(vp_hash) == cache_vp.end()) { OpenGLVertexProgram vp; vp.decompile(vp_data); vp.compile(); cache_vp[vp_hash] = vp; } auto fp_data = nucleus.memory.ptr<rsx_fp_instruction_t>((fp_location ? nucleus.rsx.get_ea(0x0) : 0xC0000000) + fp_offset); auto fp_hash = HashFragmentProgram(fp_data); if (cache_fp.find(fp_hash) == cache_fp.end()) { OpenGLFragmentProgram fp; fp.decompile(fp_data); fp.compile(); cache_fp[fp_hash] = fp; } // Link, validate and use program GLuint id = glCreateProgram(); glAttachShader(id, cache_vp[vp_hash].id); glAttachShader(id, cache_fp[fp_hash].id); glLinkProgram(id); GLint status; glGetProgramiv(id, GL_LINK_STATUS, &status); if (status != GL_TRUE) { nucleus.log.error(LOG_GPU, "PGRAPH_OpenGL::DrawArrays: Can't link program"); } glGetProgramiv(id, GL_VALIDATE_STATUS, &status); if (status != GL_TRUE) { nucleus.log.error(LOG_GPU, "PGRAPH_OpenGL::DrawArrays: Can't validate program"); } glUseProgram(id); // Upload VP constants for (u32 i = 0; i < 468; i++) { auto& constant = vpe.constant[i]; if (constant.dirty) { GLint loc = glGetUniformLocation(id, format("c[%d]", i).c_str()); glUniform4f(loc, constant.x, constant.y, constant.z, constant.w); constant.dirty = false; } } // Bind textures for (u32 i = 0; i < RSX_MAX_TEXTURES; i++) { const auto& tex = texture[i]; if (tex.enable) { GLuint tid; glActiveTexture(GL_TEXTURE0 + i); glGenTextures(1, &tid); glBindTexture(GL_TEXTURE_2D, tid); // Init texture void* texaddr = nucleus.memory.ptr<void>((tex.location ? nucleus.rsx.get_ea(0x0) : 0xC0000000) + tex.offset); switch (tex.format & ~RSX_TEXTURE_LN & ~RSX_TEXTURE_UN) { case RSX_TEXTURE_B8: glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width, tex.height, 0, GL_BLUE, GL_UNSIGNED_BYTE, texaddr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_BLUE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_BLUE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_BLUE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_BLUE); break; case RSX_TEXTURE_A8R8G8B8: glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.width, tex.height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, texaddr); break; default: nucleus.log.error(LOG_GPU, "Unsupported texture format (%d)", tex.format); } glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } } GLenum mode = vertex_primitive - 1; glDrawArrays(mode, first, count); checkRendererError("DrawArrays"); }