void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) { // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to // the backbuffer. Detect this and have the framebuffermanager draw the pixels. u32 backBuffer = PrevDisplayFramebufAddr(); u32 displayBuffer = DisplayFramebufAddr(); // TODO: Is this not handled by upload? Should we check !dstBuffer to avoid a double copy? if (((backBuffer != 0 && dstBasePtr == backBuffer) || (displayBuffer != 0 && dstBasePtr == displayBuffer)) && dstStride == 512 && height == 272 && !useBufferedRendering_) { FlushBeforeCopy(); DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, 512, false); } if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) { VirtualFramebuffer *dstBuffer = 0; VirtualFramebuffer *srcBuffer = 0; int srcWidth = width; int srcHeight = height; int dstWidth = width; int dstHeight = height; FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) { return; } if (dstBuffer && !srcBuffer) { WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr); if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp; int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2; float dstXFactor = (float)bpp / dstBpp; if (dstWidth > dstBuffer->width || dstHeight > dstBuffer->height) { // The buffer isn't big enough, and we have a clear hint of size. Resize. // This happens in Valkyrie Profile when uploading video at the ending. ResizeFramebufFBO(dstBuffer, dstWidth, dstHeight, false, true); } DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight); SetColorUpdated(dstBuffer, skipDrawReason); RebindFramebuffer(); } } } }
bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp) { if (!useBufferedRendering_ || updateVRAM_) { return false; } // Skip checking if there's no framebuffers in that area. if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) { return false; } VirtualFramebuffer *dstBuffer = 0; VirtualFramebuffer *srcBuffer = 0; int srcWidth = width; int srcHeight = height; int dstWidth = width; int dstHeight = height; FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp); if (dstBuffer && srcBuffer) { if (srcBuffer == dstBuffer) { if (srcX != dstX || srcY != dstY) { WARN_LOG_ONCE(dstsrc, G3D, "Intra-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); RebindFramebuffer(); SetColorUpdated(dstBuffer); return true; } } else { // Ignore, nothing to do. Tales of Phantasia X does this by accident. if (g_Config.bBlockTransferGPU) { return true; } } } else { WARN_LOG_ONCE(dstnotsrc, G3D, "Inter-buffer block transfer %08x -> %08x", srcBasePtr, dstBasePtr); // Just do the blit! if (g_Config.bBlockTransferGPU) { FlushBeforeCopy(); BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp); RebindFramebuffer(); SetColorUpdated(dstBuffer); return true; // No need to actually do the memory copy behind, probably. } } return false; } else if (dstBuffer) { // Here we should just draw the pixels into the buffer. Copy first. return false; } else if (srcBuffer) { WARN_LOG_ONCE(btd, G3D, "Block transfer download %08x -> %08x", srcBasePtr, dstBasePtr); FlushBeforeCopy(); if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) { const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2; const float srcXFactor = (float)bpp / srcBpp; const bool tooTall = srcY + srcHeight > srcBuffer->bufferHeight; if (srcHeight <= 0 || (tooTall && srcY != 0)) { WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); } else { if (tooTall) WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight); ReadFramebufferToMemory(srcBuffer, true, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcWidth * srcXFactor), srcHeight); } } return false; // Let the bit copy happen } else { return false; } }
bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZero) { if (!MayIntersectFramebuffer(addr)) { return false; } VirtualFramebuffer *dstBuffer = 0; for (size_t i = 0; i < vfbs_.size(); ++i) { VirtualFramebuffer *vfb = vfbs_[i]; if (MaskedEqual(vfb->fb_address, addr)) { dstBuffer = vfb; } } if (!dstBuffer) { return false; } int values = 0; u8 usedBits = 0; const u8 *src = Memory::GetPointer(addr); if (!src) return false; switch (dstBuffer->format) { case GE_FORMAT_565: // Well, this doesn't make much sense. return false; case GE_FORMAT_5551: usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); values = 2; break; case GE_FORMAT_4444: usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); values = 16; break; case GE_FORMAT_8888: usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight); values = 256; break; case GE_FORMAT_INVALID: // Impossible. break; } if (usedBits == 0) { if (skipZero) { // Common when creating buffers, it's already 0. We're done. return false; } // Let's not bother with the shader if it's just zero. if (dstBuffer->fbo) { draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } render_->Clear(0, 0, 0, GL_STENCIL_BUFFER_BIT | GL_COLOR_BUFFER_BIT, 0x8, 0, 0, 0, 0); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE); return true; } if (!stencilUploadProgram_) { std::string errorString; static std::string vs_code, fs_code; vs_code = ApplyGLSLPrelude(stencil_vs, GL_VERTEX_SHADER); fs_code = ApplyGLSLPrelude(stencil_fs, GL_FRAGMENT_SHADER); std::vector<GLRShader *> shaders; shaders.push_back(render_->CreateShader(GL_VERTEX_SHADER, vs_code, "stencil")); shaders.push_back(render_->CreateShader(GL_FRAGMENT_SHADER, fs_code, "stencil")); std::vector<GLRProgram::UniformLocQuery> queries; queries.push_back({ &u_stencilUploadTex, "tex" }); queries.push_back({ &u_stencilValue, "u_stencilValue" }); std::vector<GLRProgram::Initializer> inits; inits.push_back({ &u_stencilUploadTex, 0, 0 }); stencilUploadProgram_ = render_->CreateProgram(shaders, {}, queries, inits, false); for (auto iter : shaders) { render_->DeleteShader(iter); } if (!stencilUploadProgram_) { ERROR_LOG_REPORT(G3D, "Failed to compile stencilUploadProgram! This shouldn't happen.\n%s", errorString.c_str()); } } shaderManagerGL_->DirtyLastShader(); bool useBlit = gstate_c.Supports(GPU_SUPPORTS_ARB_FRAMEBUFFER_BLIT | GPU_SUPPORTS_NV_FRAMEBUFFER_BLIT); // Our fragment shader (and discard) is slow. Since the source is 1x, we can stencil to 1x. // Then after we're done, we'll just blit it across and stretch it there. if (dstBuffer->bufferWidth == dstBuffer->renderWidth || !dstBuffer->fbo) { useBlit = false; } u16 w = useBlit ? dstBuffer->bufferWidth : dstBuffer->renderWidth; u16 h = useBlit ? dstBuffer->bufferHeight : dstBuffer->renderHeight; Draw::Framebuffer *blitFBO = nullptr; if (useBlit) { blitFBO = GetTempFBO(w, h, Draw::FBO_8888); draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }); } else if (dstBuffer->fbo) { draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); } render_->SetViewport({ 0, 0, (float)w, (float)h, 0.0f, 1.0f }); float u1 = 1.0f; float v1 = 1.0f; MakePixelTexture(src, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->bufferWidth, dstBuffer->bufferHeight, u1, v1); textureCacheGL_->ForgetLastTexture(); // We must bind the program after starting the render pass, and set the color mask after clearing. render_->SetDepth(false, false, GL_ALWAYS); render_->Clear(0, 0, 0, GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, 0x8, 0, 0, 0, 0); render_->SetStencilFunc(GL_TRUE, GL_ALWAYS, 0xFF, 0xFF); render_->SetRaster(false, GL_CCW, GL_FRONT, GL_FALSE); render_->BindProgram(stencilUploadProgram_); render_->SetNoBlendAndMask(0x8); for (int i = 1; i < values; i += i) { if (!(usedBits & i)) { // It's already zero, let's skip it. continue; } if (dstBuffer->format == GE_FORMAT_4444) { render_->SetStencilOp((i << 4) | i, GL_REPLACE, GL_REPLACE, GL_REPLACE); render_->SetUniformF1(&u_stencilValue, i * (16.0f / 255.0f)); } else if (dstBuffer->format == GE_FORMAT_5551) { render_->SetStencilOp(0xFF, GL_REPLACE, GL_REPLACE, GL_REPLACE); render_->SetUniformF1(&u_stencilValue, i * (128.0f / 255.0f)); } else { render_->SetStencilOp(i, GL_REPLACE, GL_REPLACE, GL_REPLACE); render_->SetUniformF1(&u_stencilValue, i * (1.0f / 255.0f)); } DrawActiveTexture(0, 0, dstBuffer->width, dstBuffer->height, dstBuffer->bufferWidth, dstBuffer->bufferHeight, 0.0f, 0.0f, u1, v1, ROTATION_LOCKED_HORIZONTAL, DRAWTEX_NEAREST | DRAWTEX_KEEP_STENCIL_ALPHA); } if (useBlit) { draw_->BlitFramebuffer(blitFBO, 0, 0, w, h, dstBuffer->fbo, 0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight, Draw::FB_STENCIL_BIT, Draw::FB_BLIT_NEAREST); } gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE); RebindFramebuffer(); return true; }