bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) { if (!useBufferedRendering_ || updateVRAM_) { return false; } // Skip checking if there's no framebuffers in that area. if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) { return false; } VirtualFramebuffer *dstBuffer = 0; VirtualFramebuffer *srcBuffer = 0; int srcWidth = width; int srcHeight = height; int dstWidth = width; int dstHeight = height; FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); if (dstBuffer && srcBuffer) { if (srcBuffer == dstBuffer) { if (srcX != dstX || srcY != dstY) { WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); RebindFramebuffer(); SetColorUpdated(dstBuffer, skipDrawReason); return true; } } else { // Ignore, nothing to do. Tales of Phantasia X does this by accident. if (g_Config.bBlockTransferGPU) { return true; } } } else { WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); // Just do the blit! if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); RebindFramebuffer(); SetColorUpdated(dstBuffer, skipDrawReason); return true; // No need to actually do the memory copy behind, probably. } } return false; } else if (dstBuffer) { // Here we should just draw the pixels into the buffer. Copy first. return false; } else if (srcBuffer) { WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr); FlushBeforeCopy(); if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2; const float srcXFactor = (float)bpp / srcBpp; const bool tooTall = srcY + srcHeight > srcBuffer->bufferHeight; if (srcHeight <= 0 || (tooTall && srcY != 0)) { WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); } else { if (tooTall) WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); ReadFramebufferToMemory(srcBuffer, true, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcWidth * srcXFactor), srcHeight); } } return false; // Let the bit copy happen } else { return false; } }
bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZero) { if (!MayIntersectFramebuffer(addr)) { return false; } VirtualFramebuffer *dstBuffer = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; if (MaskedEqual(vfb->fb_address, addr)) { dstBuffer = vfb; } } if (!dstBuffer) { return false; } int values = 0; u8 usedBits = 0; const u8 *src = Memory::GetPointer(addr); if (!src) return false; switch (dstBuffer->format) { case GE_FORMAT_565: // Well, this doesn't make much sense. return false; case GE_FORMAT_5551: usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); values = 2; break; case GE_FORMAT_4444: usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); values = 16; break; case GE_FORMAT_8888: usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); values = 256; break; case GE_FORMAT_INVALID: // Impossible. break; } if (usedBits == 0) { if (skipZero) { // Common when creating buffers, it's already 0. We're done. return false; } // Let's not bother with the shader if it's just zero. if (dstBuffer->fbo) { draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } render_->Clear(0, 0, 0, GL_STENCIL_BUFFER_BIT | GL_COLOR_BUFFER_BIT, 0x8, 0, 0, 0, 0); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE); return true; } if (!stencilUploadProgram_) { std::string errorString; static std::string vs_code, fs_code; vs_code = ApplyGLSLPrelude(stencil_vs, GL_VERTEX_SHADER); fs_code = ApplyGLSLPrelude(stencil_fs, GL_FRAGMENT_SHADER); std::vector<GLRShader *> shaders; shaders.push_back(render_->CreateShader(GL_VERTEX_SHADER, vs_code, "stencil")); shaders.push_back(render_->CreateShader(GL_FRAGMENT_SHADER, fs_code, "stencil")); std::vector<GLRProgram::UniformLocQuery> queries; queries.push_back({ &u_stencilUploadTex, "tex" }); queries.push_back({ &u_stencilValue, "u_stencilValue" }); std::vector<GLRProgram::Initializer> inits; inits.push_back({ &u_stencilUploadTex, 0, 0 }); stencilUploadProgram_ = render_->CreateProgram(shaders, {}, queries, inits, false); for (auto iter : shaders) { render_->DeleteShader(iter); } if (!stencilUploadProgram_) { ERROR_LOG_REPORT(G3D, "Failed to compile stencilUploadProgram! This shouldn't happen.\n%s", errorString.c_str()); } } shaderManagerGL_->DirtyLastShader(); bool useBlit = gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT); // Our fragment shader (and discard) is slow. Since the source is 1x, we can stencil to 1x. // Then after we're done, we'll just blit it across and stretch it there. if (dstBuffer->bufferWidth == dstBuffer->renderWidth || !dstBuffer->fbo) { useBlit = false; } u16 w = useBlit ? dstBuffer->bufferWidth : dstBuffer->renderWidth; u16 h = useBlit ? dstBuffer->bufferHeight : dstBuffer->renderHeight; Draw::Framebuffer *blitFBO = nullptr; if (useBlit) { blitFBO = GetTempFBO(w, h, Draw::FBO_8888); draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); } else if (dstBuffer->fbo) { draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } render_->SetViewport({ 0, 0, (float)w, (float)h, 0.0f, 1.0f }); float u1 = 1.0f; float v1 = 1.0f; MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1); textureCacheGL_->ForgetLastTexture(); // We must bind the program after starting the render pass, and set the color mask after clearing. render_->SetDepth(false, false, GL_ALWAYS); render_->Clear(0, 0, 0, GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, 0x8, 0, 0, 0, 0); render_->SetStencilFunc(GL_TRUE, GL_ALWAYS, 0xFF, 0xFF); render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE); render_->BindProgram(stencilUploadProgram_); render_->SetNoBlendAndMask(0x8); for (int i = 1; i < values; i += i) { if (!(usedBits & i)) { // It's already zero, let's skip it. continue; } if (dstBuffer->format == GE_FORMAT_4444) { render_->SetStencilOp((i << 4) | i, GL_REPLACE, GL_REPLACE, GL_REPLACE); render_->SetUniformF1(&u_stencilValue, i * (16.0f / 255.0f)); } else if (dstBuffer->format == GE_FORMAT_5551) { render_->SetStencilOp(0xFF, GL_REPLACE, GL_REPLACE, GL_REPLACE); render_->SetUniformF1(&u_stencilValue, i * (128.0f / 255.0f)); } else { render_->SetStencilOp(i, GL_REPLACE, GL_REPLACE, GL_REPLACE); render_->SetUniformF1(&u_stencilValue, i * (1.0f / 255.0f)); } DrawActiveTexture(0, 0, dstBuffer->width, dstBuffer->height, dstBuffer->bufferWidth, dstBuffer->bufferHeight, 0.0f, 0.0f, u1, v1, ROTATION_LOCKED_HORIZONTAL, DRAWTEX_NEAREST | DRAWTEX_KEEP_STENCIL_ALPHA); } if (useBlit) { draw_->BlitFramebuffer(blitFBO, 0, 0, w, h, dstBuffer->fbo, 0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight, Draw::FB_STENCIL_BIT, Draw::FB_BLIT_NEAREST); } gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE); RebindFramebuffer(); return true; }
bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset, u32 skipDrawReason) { if (updateVRAM_ || size == 0) { return false; } dst &= 0x3FFFFFFF; src &= 0x3FFFFFFF; VirtualFramebuffer *dstBuffer = 0; VirtualFramebuffer *srcBuffer = 0; u32 dstY = (u32)-1; u32 dstH = 0; u32 srcY = (u32)-1; u32 srcH = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; if (vfb->fb_stride == 0) { continue; } const u32 vfb_address = (0x04000000 | vfb->fb_address) & 0x3FFFFFFF; const u32 vfb_size = FramebufferByteSize(vfb); const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2; const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp; const int vfb_byteWidth = vfb->width * vfb_bpp; if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { const u32 offset = dst - vfb_address; const u32 yOffset = offset / vfb_byteStride; if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) { dstBuffer = vfb; dstY = yOffset; dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); } } if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { const u32 offset = src - vfb_address; const u32 yOffset = offset / vfb_byteStride; if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) { srcBuffer = vfb; srcY = yOffset; srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height); } else if ((offset % vfb_byteStride) == 0 && size == vfb->fb_stride && yOffset < srcY) { // Valkyrie Profile reads 512 bytes at a time, rather than 2048. So, let's whitelist fb_stride also. srcBuffer = vfb; srcY = yOffset; srcH = 1; } } } if (srcBuffer && srcY == 0 && srcH == srcBuffer->height && !dstBuffer) { // MotoGP workaround - it copies a framebuffer to memory and then displays it. // TODO: It's rare anyway, but the game could modify the RAM and then we'd display the wrong thing. // Unfortunately, that would force 1x render resolution. if (Memory::IsRAMAddress(dst)) { knownFramebufferRAMCopies_.insert(std::pair<u32, u32>(src, dst)); } } if (!useBufferedRendering_) { // If we're copying into a recently used display buf, it's probably destined for the screen. if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) { return false; } } if (dstBuffer && srcBuffer && !isMemset) { if (srcBuffer == dstBuffer) { WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); } else { WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); // Just do the blit! if (g_Config.bBlockTransferGPU) { BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); SetColorUpdated(dstBuffer, skipDrawReason); RebindFramebuffer(); } } return false; } else if (dstBuffer) { WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); const u8 *srcBase = Memory::GetPointerUnchecked(src); DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); SetColorUpdated(dstBuffer, skipDrawReason); RebindFramebuffer(); // This is a memcpy, let's still copy just in case. return false; } return false; } else if (srcBuffer) { WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); FlushBeforeCopy(); if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { ReadFramebufferToMemory(srcBuffer, true, 0, srcY, srcBuffer->width, srcH); } return false; } else { return false; } }