void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {0, 5, 6, 8, 9}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); GL_POP(); }
void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); int w = std::max<int>(cr.width(), 1); int h = std::max<int>(cr.height(), 1); if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h)) { return; } } GL_PUSH("Present"); ClearRenderTarget(m_backbuffer, 0); if(m_current) { static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE, ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER, ShaderConvert_COMPLEX_FILTER}; // FIXME Present(m_current, m_backbuffer, GSVector4(r), s_shader[shader]); } Flip(); GL_POP(); }
void GSDeviceOGL::CreateTextureFX() { m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer)); m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); // warning 1 sampler by image unit. So you cannot reuse m_ps_ss... m_palette_ss = CreateSampler(false, false, false); glBindSampler(1, m_palette_ss); // Pre compile all Geometry & Vertex Shader // It might cost a seconds at startup but it would reduce benchmark pollution GL_PUSH("Compile GS"); for (uint32 key = 0; key < countof(m_gs); key++) { GSSelector sel(key); if (sel.point == sel.sprite) m_gs[key] = 0; else m_gs[key] = CompileGS(GSSelector(key)); } GL_POP(); GL_PUSH("Compile VS"); for (uint32 key = 0; key < countof(m_vs); key++) { // wildhack is only useful if both TME and FST are enabled. VSSelector sel(key); if (sel.wildhack && (!sel.tme || !sel.fst)) m_vs[key] = 0; else m_vs[key] = CompileVS(sel, !GLLoader::found_GL_ARB_clip_control); } GL_POP(); // Enable all bits for stencil operations. Technically 1 bit is // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); for (uint32 key = 0; key < countof(m_om_dss); key++) { m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); } // Help to debug FS in apitrace m_apitrace = CompilePS(PSSelector()); }
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); GL_PUSH("Upload Texture %d", m_texture_id); m_dirty = true; m_clean = false; glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); char* src = (char*)data; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; char* map = PboPool::Map(map_size); #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch for (int h = 0; h < r.height(); h++) { memcpy(map, src, row_byte); map += row_byte; src += pitch; } PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); GL_POP(); return true; // For reference, standard upload without pbo (Used to crash on FGLRX) #if 0 // pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); // FIXME useful? glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior return true; #endif }
void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) { GL_PUSH("UBO"); if(m_vs_cb_cache.Update(vs_cb)) { m_vs_cb->upload(vs_cb); } if(m_ps_cb_cache.Update(ps_cb)) { m_ps_cb->upload(ps_cb); } GL_POP(); }
void GSTextureOGL::Unmap() { if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); GL_POP(); // PUSH is in Map } }
void GSDeviceOGL::CreateTextureFX() { GL_PUSH("CreateTextureFX"); m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer)); m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); // warning 1 sampler by image unit. So you cannot reuse m_ps_ss... m_palette_ss = CreateSampler(false, false, false); gl_BindSampler(1, m_palette_ss); // Pre compile all Geometry & Vertex Shader // It might cost a seconds at startup but it would reduce benchmark pollution m_gs = CompileGS(); int logz = theApp.GetConfig("logz", 1); // Don't do it in debug build, so it can still be tested #ifndef _DEBUG if (GLLoader::found_GL_ARB_clip_control && logz) { fprintf(stderr, "Your driver supports advance depth. Logz will be disabled\n"); logz = 0; } else if (!GLLoader::found_GL_ARB_clip_control && !logz) { fprintf(stderr, "Your driver DOESN'T support advance depth (GL_ARB_clip_control)\n It is higly recommmended to enable logz\n"); } #endif for (uint32 key = 0; key < VSSelector::size(); key++) { // wildhack is only useful if both TME and FST are enabled. VSSelector sel(key); if (sel.wildhack && (!sel.tme || !sel.fst)) m_vs[key] = 0; else m_vs[key] = CompileVS(sel, logz); } // Enable all bits for stencil operations. Technically 1 bit is // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); // Help to debug FS in apitrace m_apitrace = CompilePS(PSSelector()); GL_POP(); }
void GSShaderOGL::UseProgram() { GL_PUSH("Use Program And Uniform"); if (GLState::dirty_prog) { if (!GLLoader::found_GL_ARB_separate_shader_objects) { GLState::dirty_ressources = true; hash_map<uint64, GLuint >::iterator it; // Note: shader are integer lookup pointer. They start from 1 and incr // every time you create a new shader OR a new program. // Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128. GS has only 2 programs // We migth be able to pack the value in a 32bits int // I would need to check the behavior on Nvidia (pause/resume). uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps; it = m_single_prog.find(sel); if (it == m_single_prog.end()) { GLState::program = LinkNewProgram(); m_single_prog[sel] = GLState::program; ValidateProgram(GLState::program); gl_UseProgram(GLState::program); } else { GLuint prog = it->second; if (prog != GLState::program) { GLState::program = prog; gl_UseProgram(GLState::program); } } } else { ValidatePipeline(m_pipeline); } } SetupRessources(); GLState::dirty_prog = false; GL_POP(); }
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) return; const GIFRegTEX0& TEX0 = t->m_TEX0; GLuint fmt; int ps_shader; switch (TEX0.PSM) { case PSM_PSMCT32: case PSM_PSMCT24: fmt = GL_RGBA8; ps_shader = 0; break; case PSM_PSMCT16: case PSM_PSMCT16S: fmt = GL_R16UI; ps_shader = 1; break; case PSM_PSMZ32: fmt = GL_R32UI; ps_shader = 10; break; case PSM_PSMZ24: fmt = GL_R32UI; ps_shader = 10; break; case PSM_PSMZ16: case PSM_PSMZ16S: fmt = GL_R16UI; ps_shader = 10; break; default: return; } // Yes lots of logging, but I'm not confident with this code GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) { GSTexture::GSMap m; if(offscreen->Map(m)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; case PSM_PSMZ32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMZ24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMZ16: case PSM_PSMZ16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } GL_POP(); }
void GSRendererHW::Draw() { if(m_dev->IsLost() || GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) { GL_INS("Warning skipping a draw call (%d)", s_n); s_n += 3; // Keep it sync with SW renderer return; } GL_PUSH("HW Draw %d", s_n); GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; // It is allowed to use the depth and rt at the same location. However at least 1 must // be disabled. // 1/ GoW uses a Cd blending on a 24 bits buffer (no alpha) // 2/ SuperMan really draws (0,0,0,0) color and a (0) 32-bits depth // 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth // Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0 const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1)); const bool no_ds = !no_rt && ( // Depth is always pass (no read) and write are discarded (tekken 5). (Note: DATE is currently implemented with a stencil buffer) (context->ZBUF.ZMSK && m_context->TEST.ZTST == ZTST_ALWAYS && !m_context->TEST.DATE) || // Depth will be written through the RT (context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && !context->ZBUF.ZMSK && !context->FRAME.FBMSK && context->TEST.ZTE) ); GIFRegTEX0 TEX0; TEX0.TBP0 = context->FRAME.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->FRAME.PSM; GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); GSTexture* rt_tex = rt ? rt->m_texture : NULL; TEX0.TBP0 = context->ZBUF.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->ZBUF.PSM; GSTextureCache::Target* ds = no_ds ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); GSTexture* ds_tex = ds ? ds->m_texture : NULL; if(!(rt || no_rt) || !(ds || no_ds)) { GL_POP(); ASSERT(0); return; } GSTextureCache::Source* tex = NULL; m_texture_shuffle = false; if(PRIM->TME) { /* // m_tc->LookupSource will mess with the palette, should not, but we do this after, until it is sorted out if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } */ GSVector4i r; GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); if(!tex) { GL_POP(); return; } // FIXME: Could be removed on openGL if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. // Initially code also tested the RT but it gives too much false-positive // // Both input and output are 16 bits and texture was initially 32 bits! m_texture_shuffle = (context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt; // Texture shuffle is not yet supported with strange clamp mode ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); } if (rt) { // Be sure texture shuffle detection is properly propagated // Otherwise set or clear the flag (Code in texture cache only set the flag) // Note: it is important to clear the flag when RT is used as a real 16 bits target. rt->m_32_bits_fmt = m_texture_shuffle || !(context->FRAME.PSM & 0x2); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if (s_n >= s_saven) { // Dump Register state s = format("%05d_context.txt", s_n); m_env.Dump(root_hw+s); m_context->Dump(root_hw+s); } if(s_savet && s_n >= s_saven && tex) { s = format("%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); tex->m_texture->Save(root_hw+s, true); if(tex->m_palette) { s = format("%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); tex->m_palette->Save(root_hw+s, true); } } s_n++; if(s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); if (rt) rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); if (ds_tex) ds_tex->Save(root_hw+s); } s_n++; #ifdef ENABLE_OGL_DEBUG } else { s_n += 2; #endif } if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, tex)) { s_n += 1; // keep counter sync GL_POP(); return; } // skip alpha test if possible GIFRegTEST TEST = context->TEST; GIFRegFRAME FRAME = context->FRAME; GIFRegZBUF ZBUF = context->ZBUF; uint32 fm = context->FRAME.FBMSK; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) { if(GSRenderer::TryAlphaTest(fm, zm)) { context->TEST.ATST = ATST_ALWAYS; } } context->FRAME.FBMSK = fm; context->ZBUF.ZMSK = zm != 0; // A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; // Hack to avoid vertical black line in various games (ace combat/tekken) if (m_userhacks_align_sprite_X) { // Note for performance reason I do the check only once on the first // primitive int win_position = v[1].XYZ.X - context->XYOFFSET.OFX; const bool unaligned_position = ((win_position & 0xF) == 8); const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X); if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) { // Normaly vertex are aligned on full pixels and texture in half // pixels. Let's extend the coverage of an half-pixel to avoid // hole after upscaling for(size_t i = 0; i < count; i += 2) { v[i+1].XYZ.X += 8; // I really don't know if it is a good idea. Neither what to do for !PRIM->FST if (unaligned_texture) v[i+1].U += 8; } } } if (PRIM->FST) { if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) { if (m_vt.IsLinear()) RoundSpriteOffset<true>(); else RoundSpriteOffset<false>(); } } else { ; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior) } } // DrawPrims(rt_tex, ds_tex, tex); // context->TEST = TEST; context->FRAME = FRAME; context->ZBUF = ZBUF; // GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); if(fm != 0xffffffff && rt) { rt->m_valid = rt->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.fb, r, false); m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); } if(zm != 0xffffffff && ds) { ds->m_valid = ds->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.zb, r, false); m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); } // if(m_hacks.m_oo) { (this->*m_hacks.m_oo)(); } if(s_dump) { uint64 frame = m_perfmon.GetFrame(); string s; if(s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); if (rt) rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); if (ds_tex) ds_tex->Save(root_hw+s); } s_n++; if(s_savel > 0 && (s_n - s_saven) > s_savel) { s_dump = 0; } #ifdef ENABLE_OGL_DEBUG } else { s_n += 1; #endif } #ifdef DISABLE_HW_TEXTURE_CACHE if (rt) m_tc->Read(rt, r); #endif GL_POP(); }
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { if(t->m_type != RenderTarget) { ASSERT(0); return; } const GIFRegTEX0& TEX0 = t->m_TEX0; if(TEX0.PSM != PSM_PSMCT32 && TEX0.PSM != PSM_PSMCT24 && TEX0.PSM != PSM_PSMCT16 && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } if(!t->m_dirty.empty()) { return; } GL_PUSH("Texture Cache Read"); // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); GLuint format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? GL_R16UI : GL_RGBA8; //if (format == GL_R16UI) fprintf(stderr, "Format 16 bits integer\n"); #if 0 DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; #endif if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; if(offscreen->Map(m)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); break; case PSM_PSMCT24: m_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r); break; case PSM_PSMCT16: case PSM_PSMCT16S: m_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r); break; default: ASSERT(0); } offscreen->Unmap(); } // FIXME invalidate data m_renderer->m_dev->Recycle(offscreen); } GL_POP(); }
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); // Default upload path for the texture is the Map/Unmap // This path is mostly used for palette. But also for texture that could // overflow the pbo buffer // Data upload is rather small typically 64B or 1024B. So don't bother with PBO // and directly send the data to the GL synchronously m_clean = false; uint32 row_byte = r.width() << m_int_shift; uint32 map_size = r.height() * row_byte; #ifdef ENABLE_OGL_DEBUG_MEM_BW g_real_texture_upload_byte += map_size; #endif #if 0 if (r.height() == 1) { // Palette data. Transfer is small either 64B or 1024B. // Sometimes it is faster, sometimes slower. glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); return true; } #endif GL_PUSH("Upload Texture %d", m_texture_id); // The easy solution without PBO #if 0 // Likely a bad texture glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior #endif // The complex solution with PBO #if 1 char* src = (char*)data; char* map = PboPool::Map(map_size); // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch for (int h = 0; h < r.height(); h++) { memcpy(map, src, row_byte); map += row_byte; src += pitch; } PboPool::Unmap(); glTextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); // FIXME OGL4: investigate, only 1 unpack buffer always bound PboPool::UnbindPbo(); PboPool::EndTransfer(); #endif GL_POP(); return true; }