/* Break the COW tie to the pbo. Both the pbo and the region end up * with a copy of the data. */ void intel_region_cow(struct intel_context *intel, struct intel_region *region) { struct intel_buffer_object *pbo = region->pbo; GLboolean was_locked = intel->locked; if (intel == NULL) return; intel_region_release_pbo(intel, region); assert(region->cpp * region->pitch * region->height == pbo->Base.Size); DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ was_locked = intel->locked; if (!was_locked) LOCK_HARDWARE(intel); intelEmitCopyBlit(intel, region->cpp, region->pitch, region->buffer, 0, region->tiling, region->pitch, pbo->buffer, 0, region->tiling, 0, 0, 0, 0, region->pitch, region->height, GL_COPY); if (!was_locked) UNLOCK_HARDWARE(intel); }
/* Copy rectangular sub-regions. Need better logic about when to * push buffers into AGP - will currently do so whenever possible. */ void intel_region_copy(struct intel_context *intel, struct intel_region *dst, GLuint dst_offset, GLuint dstx, GLuint dsty, struct intel_region *src, GLuint src_offset, GLuint srcx, GLuint srcy, GLuint width, GLuint height) { DBG("%s\n", __FUNCTION__); if (intel == NULL) return; if (dst->pbo) { if (dstx == 0 && dsty == 0 && width == dst->pitch && height == dst->height) intel_region_release_pbo(intel, dst); else intel_region_cow(intel, dst); } assert(src->cpp == dst->cpp); intelEmitCopyBlit(intel, dst->cpp, src->pitch, src->buffer, src_offset, src->tiling, dst->pitch, dst->buffer, dst_offset, dst->tiling, srcx, srcy, dstx, dsty, width, height, GL_COPY); }
/* Break the COW tie to the pbo. Both the pbo and the region end up * with a copy of the data. */ void intel_region_cow(struct intel_context *intel, struct intel_region *region) { struct intel_buffer_object *pbo = region->pbo; GLboolean ok; intel_region_release_pbo(intel, region); assert(region->cpp * region->pitch * region->height == pbo->Base.Size); _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ LOCK_HARDWARE(intel); ok = intelEmitCopyBlit(intel, region->cpp, region->pitch, pbo->buffer, 0, region->tiling, region->pitch, region->buffer, 0, region->tiling, 0, 0, 0, 0, region->pitch, region->height, GL_COPY); assert(ok); UNLOCK_HARDWARE(intel); }
/* We don't have a memmove-type blit like some other hardware, so we'll do a * rectangular blit covering a large space, then emit 1-scanline blit at the * end to cover the last if we need. */ void intel_emit_linear_blit(struct intel_context *intel, drm_intel_bo *dst_bo, unsigned int dst_offset, drm_intel_bo *src_bo, unsigned int src_offset, unsigned int size) { struct gl_context *ctx = &intel->ctx; GLuint pitch, height; bool ok; /* The pitch given to the GPU must be DWORD aligned, and * we want width to match pitch. Max width is (1 << 15 - 1), * rounding that down to the nearest DWORD is 1 << 15 - 4 */ pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4); height = (pitch == 0) ? 1 : size / pitch; ok = intelEmitCopyBlit(intel, 1, pitch, src_bo, src_offset, I915_TILING_NONE, pitch, dst_bo, dst_offset, I915_TILING_NONE, 0, 0, /* src x/y */ 0, 0, /* dst x/y */ pitch, height, /* w, h */ GL_COPY); if (!ok) _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height); src_offset += pitch * height; dst_offset += pitch * height; size -= pitch * height; assert (size < (1 << 15)); pitch = ALIGN(size, 4); if (size != 0) { ok = intelEmitCopyBlit(intel, 1, pitch, src_bo, src_offset, I915_TILING_NONE, pitch, dst_bo, dst_offset, I915_TILING_NONE, 0, 0, /* src x/y */ 0, 0, /* dst x/y */ size, 1, /* w, h */ GL_COPY); if (!ok) _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1); } }
/* XXX: Do this for TexSubImage also: */ static GLboolean try_pbo_upload(struct intel_context *intel, struct intel_texture_image *intelImage, const struct gl_pixelstore_attrib *unpack, GLint internalFormat, GLint width, GLint height, GLenum format, GLenum type, const void *pixels) { struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { _mesa_printf("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } src_offset = (GLuint) pixels; if (unpack->RowLength > 0) src_stride = unpack->RowLength; else src_stride = width; dst_offset = intel_miptree_image_offset(intelImage->mt, intelImage->face, intelImage->level); dst_stride = intelImage->mt->pitch; intelFlush(&intel->ctx); LOCK_HARDWARE(intel); { struct _DriBufferObject *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); struct _DriBufferObject *dst_buffer = intel_region_buffer(intel->intelScreen, intelImage->mt->region, INTEL_WRITE_FULL); intelEmitCopyBlit(intel, intelImage->mt->cpp, src_stride, src_buffer, src_offset, dst_stride, dst_buffer, dst_offset, 0, 0, 0, 0, width, height, GL_COPY); intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); return GL_TRUE; }
/* We don't have a memmove-type blit like some other hardware, so we'll do a * rectangular blit covering a large space, then emit 1-scanline blit at the * end to cover the last if we need. */ void intel_emit_linear_blit(struct intel_context *intel, drm_intel_bo *dst_bo, unsigned int dst_offset, drm_intel_bo *src_bo, unsigned int src_offset, unsigned int size) { GLuint pitch, height; GLboolean ok; /* The pitch given to the GPU must be DWORD aligned, and * we want width to match pitch. Max width is (1 << 15 - 1), * rounding that down to the nearest DWORD is 1 << 15 - 4 */ pitch = MIN2(size, (1 << 15) - 4); height = size / pitch; ok = intelEmitCopyBlit(intel, 1, pitch, src_bo, src_offset, I915_TILING_NONE, pitch, dst_bo, dst_offset, I915_TILING_NONE, 0, 0, /* src x/y */ 0, 0, /* dst x/y */ pitch, height, /* w, h */ GL_COPY); assert(ok); src_offset += pitch * height; dst_offset += pitch * height; size -= pitch * height; assert (size < (1 << 15)); assert ((size & 3) == 0); /* Pitch must be DWORD aligned */ if (size != 0) { ok = intelEmitCopyBlit(intel, 1, size, src_bo, src_offset, I915_TILING_NONE, size, dst_bo, dst_offset, I915_TILING_NONE, 0, 0, /* src x/y */ 0, 0, /* dst x/y */ size, 1, /* w, h */ GL_COPY); assert(ok); } }
/** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. * * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, * but limited, pitches and sizes allowed. * * The src/dst coordinates are relative to the given level/slice of the * miptree. * * If @src_flip or @dst_flip is set, then the rectangle within that miptree * will be inverted (including scanline order) when copying. This is common * in GL when copying between window system and user-created * renderbuffers/textures. */ bool intel_miptree_blit(struct intel_context *intel, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, uint32_t src_x, uint32_t src_y, bool src_flip, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_slice, uint32_t dst_x, uint32_t dst_y, bool dst_flip, uint32_t width, uint32_t height, GLenum logicop) { /* No sRGB decode or encode is done by the hardware blitter, which is * consistent with what we want in the callers (glCopyTexSubImage(), * glBlitFramebuffer(), texture validation, etc.). */ gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format); gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); /* The blitter doesn't support doing any format conversions. We do also * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ if (src_format != dst_format && ((src_format != MESA_FORMAT_ARGB8888 && src_format != MESA_FORMAT_XRGB8888) || (dst_format != MESA_FORMAT_ARGB8888 && dst_format != MESA_FORMAT_XRGB8888))) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __FUNCTION__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; } /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the region's pitch is less than 32k. */ if (src_mt->region->pitch > 32768 || dst_mt->region->pitch > 32768) { perf_debug("Falling back due to >32k pitch\n"); return false; } if (src_flip) src_y = src_mt->level[src_level].height - src_y - height; if (dst_flip) dst_y = dst_mt->level[dst_level].height - dst_y - height; int src_pitch = src_mt->region->pitch; if (src_flip != dst_flip) src_pitch = -src_pitch; uint32_t src_image_x, src_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, &src_image_x, &src_image_y); src_x += src_image_x; src_y += src_image_y; uint32_t dst_image_x, dst_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, &dst_image_x, &dst_image_y); dst_x += dst_image_x; dst_y += dst_image_y; if (!intelEmitCopyBlit(intel, src_mt->cpp, src_pitch, src_mt->region->bo, src_mt->offset, src_mt->region->tiling, dst_mt->region->pitch, dst_mt->region->bo, dst_mt->offset, dst_mt->region->tiling, src_x, src_y, dst_x, dst_y, width, height, logicop)) { return false; } if (src_mt->format == MESA_FORMAT_XRGB8888 && dst_mt->format == MESA_FORMAT_ARGB8888) { intel_miptree_set_alpha_to_one(intel, dst_mt, dst_x, dst_y, width, height); } return true; }
bool intel_copy_texsubimage(struct intel_context *intel, struct intel_texture_image *intelImage, GLint dstx, GLint dsty, struct intel_renderbuffer *irb, GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_context *ctx = &intel->ctx; struct intel_region *region; const GLenum internalFormat = intelImage->base.Base.InternalFormat; bool copy_supported = false; bool copy_supported_with_alpha_override = false; intel_prepare_render(intel); if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_PERF)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, irb, internalFormat); return false; } else { region = irb->mt->region; assert(region); } /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the region's pitch is less than 32k. */ if (region->pitch >= 32768) return false; if (intelImage->base.Base.TexObject->Target == GL_TEXTURE_1D_ARRAY || intelImage->base.Base.TexObject->Target == GL_TEXTURE_2D_ARRAY) { perf_debug("no support for array textures\n"); } copy_supported = intelImage->base.Base.TexFormat == intel_rb_format(irb); /* Converting ARGB8888 to XRGB8888 is trivial: ignore the alpha bits */ if (intel_rb_format(irb) == MESA_FORMAT_ARGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_XRGB8888) { copy_supported = true; } /* Converting XRGB8888 to ARGB8888 requires setting the alpha bits to 1.0 */ if (intel_rb_format(irb) == MESA_FORMAT_XRGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_ARGB8888) { copy_supported_with_alpha_override = true; } if (!copy_supported && !copy_supported_with_alpha_override) { if (unlikely(INTEL_DEBUG & DEBUG_PERF)) fprintf(stderr, "%s mismatched formats %s, %s\n", __FUNCTION__, _mesa_get_format_name(intelImage->base.Base.TexFormat), _mesa_get_format_name(intel_rb_format(irb))); return false; } { GLuint image_x, image_y; GLshort src_pitch; /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Base.Level, intelImage->base.Base.Face, &image_x, &image_y); /* The blitter can't handle Y-tiled buffers. */ if (intelImage->mt->region->tiling == I915_TILING_Y) { return false; } if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); src_pitch = -region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ src_pitch = region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, region->bo, 0, region->tiling, intelImage->mt->region->pitch, intelImage->mt->region->bo, 0, intelImage->mt->region->tiling, irb->draw_x + x, irb->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { return false; } } if (copy_supported_with_alpha_override) intel_set_teximage_alpha_to_one(ctx, intelImage); return true; }
/* XXX: Do this for TexSubImage also: */ static bool try_pbo_upload(struct gl_context *ctx, struct gl_texture_image *image, const struct gl_pixelstore_attrib *unpack, GLenum format, GLenum type, const void *pixels) { struct intel_texture_image *intelImage = intel_texture_image(image); struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; GLuint dst_x, dst_y; drm_intel_bo *dst_buffer, *src_buffer; if (!_mesa_is_bufferobj(unpack->BufferObj)) return false; DBG("trying pbo upload\n"); if (intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: image transfer\n", __FUNCTION__); return false; } if (!_mesa_format_matches_format_and_type(image->TexFormat, format, type, false)) { DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n", __FUNCTION__, _mesa_get_format_name(image->TexFormat), format, type); return false; } ctx->Driver.AllocTextureImageBuffer(ctx, image); if (!intelImage->mt) { DBG("%s: no miptree\n", __FUNCTION__); return false; } if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY || image->TexObject->Target == GL_TEXTURE_2D_ARRAY) { DBG("%s: no support for array textures\n", __FUNCTION__); return false; } dst_buffer = intelImage->mt->region->bo; src_buffer = intel_bufferobj_source(intel, pbo, 64, &src_offset); /* note: potential 64-bit ptr to 32-bit int cast */ src_offset += (GLuint) (unsigned long) pixels; if (unpack->RowLength > 0) src_stride = unpack->RowLength; else src_stride = image->Width; src_stride *= intelImage->mt->region->cpp; intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Base.Level, intelImage->base.Base.Face, &dst_x, &dst_y); if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_stride, src_buffer, src_offset, false, intelImage->mt->region->pitch, dst_buffer, 0, intelImage->mt->region->tiling, 0, 0, dst_x, dst_y, image->Width, image->Height, GL_COPY)) { DBG("%s: blit failed\n", __FUNCTION__); return false; } DBG("%s: success\n", __FUNCTION__); return true; }
static bool copy_image_with_blitter(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_x, int src_y, int src_z, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_x, int dst_y, int dst_z, int src_width, int src_height) { GLuint bw, bh; uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; /* The blitter doesn't understand multisampling at all. */ if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) return false; if (src_mt->format == MESA_FORMAT_S_UINT8) return false; /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the miptree's pitch is less than 32k. */ if (src_mt->pitch >= 32768 || dst_mt->pitch >= 32768) { perf_debug("Falling back due to >=32k pitch\n"); return false; } intel_miptree_get_image_offset(src_mt, src_level, src_z, &src_image_x, &src_image_y); if (_mesa_is_format_compressed(src_mt->format)) { _mesa_get_format_block_size(src_mt->format, &bw, &bh); assert(src_x % bw == 0); assert(src_y % bh == 0); assert(src_width % bw == 0); assert(src_height % bh == 0); src_x /= (int)bw; src_y /= (int)bh; src_width /= (int)bw; src_height /= (int)bh; } src_x += src_image_x; src_y += src_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_z, &dst_image_x, &dst_image_y); if (_mesa_is_format_compressed(dst_mt->format)) { _mesa_get_format_block_size(dst_mt->format, &bw, &bh); assert(dst_x % bw == 0); assert(dst_y % bh == 0); dst_x /= (int)bw; dst_y /= (int)bh; } dst_x += dst_image_x; dst_y += dst_image_y; return intelEmitCopyBlit(brw, src_mt->cpp, src_mt->pitch, src_mt->bo, src_mt->offset, src_mt->tiling, src_mt->tr_mode, dst_mt->pitch, dst_mt->bo, dst_mt->offset, dst_mt->tiling, dst_mt->tr_mode, src_x, src_y, dst_x, dst_y, src_width, src_height, GL_COPY); }
static GLboolean do_blit_readpixels(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { struct intel_context *intel = intel_context(ctx); struct intel_region *src = intel_readbuf_region(intel); struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); GLuint dst_offset; GLuint rowLength; drm_intel_bo *dst_buffer; GLboolean all; GLint dst_x, dst_y; if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s\n", __FUNCTION__); if (!src) return GL_FALSE; if (!_mesa_is_bufferobj(pack->BufferObj)) { /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } if (ctx->_ImageTransferState || !intel_check_blit_format(src, format, type)) { if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s - bad format for blit\n", __FUNCTION__); return GL_FALSE; } if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s: bad packing params\n", __FUNCTION__); return GL_FALSE; } if (pack->RowLength > 0) rowLength = pack->RowLength; else rowLength = width; if (pack->Invert) { if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); return GL_FALSE; } else { if (ctx->ReadBuffer->Name == 0) rowLength = -rowLength; } dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height, format, type, 0, 0, 0); if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) { return GL_TRUE; } intel_prepare_render(intel); all = (width * height * src->cpp == dst->Base.Size && x == 0 && dst_offset == 0); dst_x = 0; dst_y = 0; dst_buffer = intel_bufferobj_buffer(intel, dst, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); if (ctx->ReadBuffer->Name == 0) y = ctx->ReadBuffer->Height - (y + height); if (!intelEmitCopyBlit(intel, src->cpp, src->pitch, src->buffer, 0, src->tiling, rowLength, dst_buffer, dst_offset, GL_FALSE, x, y, dst_x, dst_y, width, height, GL_COPY)) { return GL_FALSE; } if (INTEL_DEBUG & DEBUG_PIXEL) printf("%s - DONE\n", __FUNCTION__); return GL_TRUE; }
static GLboolean do_copy_texsubimage(struct intel_context *intel, struct intel_texture_image *intelImage, GLenum internalFormat, GLint dstx, GLint dsty, GLint x, GLint y, GLsizei width, GLsizei height) { GLcontext *ctx = &intel->ctx; const struct intel_region *src = get_teximage_source(intel, internalFormat); if (!intelImage->mt || !src) { DBG("%s fail %p %p\n", __FUNCTION__, intelImage->mt, src); return GL_FALSE; } intelFlush(ctx); LOCK_HARDWARE(intel); { GLuint image_offset = intel_miptree_image_offset(intelImage->mt, intelImage->face, intelImage->level); const GLint orig_x = x; const GLint orig_y = y; const struct gl_framebuffer *fb = ctx->DrawBuffer; if (_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax, &x, &y, &width, &height)) { /* Update dst for clipped src. Need to also clip the source rect. */ dstx += x - orig_x; dsty += y - orig_y; if (ctx->ReadBuffer->Name == 0) { /* reading from a window, adjust x, y */ __DRIdrawablePrivate *dPriv = intel->driDrawable; GLuint window_y; /* window_y = position of window on screen if y=0=bottom */ window_y = intel->intelScreen->height - (dPriv->y + dPriv->h); y = window_y + y; x += dPriv->x; } else { /* reading from a FBO */ /* invert Y */ y = ctx->ReadBuffer->Height - y - 1; } /* A bit of fiddling to get the blitter to work with -ve * pitches. But we get a nice inverted blit this way, so it's * worth it: */ intelEmitCopyBlit(intel, intelImage->mt->cpp, -src->pitch, src->buffer, src->height * src->pitch * src->cpp, intelImage->mt->pitch, intelImage->mt->region->buffer, image_offset, x, y + height, dstx, dsty, width, height, GL_COPY); /* ? */ intel_batchbuffer_flush(intel->batch); } } UNLOCK_HARDWARE(intel); #if 0 /* GL_SGIS_generate_mipmap -- this can be accelerated now. * XXX Add a ctx->Driver.GenerateMipmaps() function? */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { intel_generate_mipmap(ctx, target, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], texObj); } #endif return GL_TRUE; }
bool intel_copy_texsubimage(struct intel_context *intel, struct intel_texture_image *intelImage, GLint dstx, GLint dsty, struct intel_renderbuffer *irb, GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_context *ctx = &intel->ctx; struct intel_region *region; const GLenum internalFormat = intelImage->base.Base.InternalFormat; bool copy_supported = false; bool copy_supported_with_alpha_override = false; intel_prepare_render(intel); if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, irb, internalFormat); return false; } else { region = irb->mt->region; assert(region); } copy_supported = intelImage->base.Base.TexFormat == intel_rb_format(irb); /* Converting ARGB8888 to XRGB8888 is trivial: ignore the alpha bits */ if (intel_rb_format(irb) == MESA_FORMAT_ARGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_XRGB8888) { copy_supported = true; } /* Converting XRGB8888 to ARGB8888 requires setting the alpha bits to 1.0 */ if (intel_rb_format(irb) == MESA_FORMAT_XRGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_ARGB8888) { copy_supported_with_alpha_override = true; } if (!copy_supported && !copy_supported_with_alpha_override) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s mismatched formats %s, %s\n", __FUNCTION__, _mesa_get_format_name(intelImage->base.Base.TexFormat), _mesa_get_format_name(intel_rb_format(irb))); return false; } { GLuint image_x, image_y; GLshort src_pitch; /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Base.Level, intelImage->base.Base.Face, 0, &image_x, &image_y); /* The blitter can't handle Y-tiled buffers. */ if (intelImage->mt->region->tiling == I915_TILING_Y) { return false; } if (ctx->ReadBuffer->Name == 0) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); src_pitch = -region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ src_pitch = region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, region->bo, 0, region->tiling, intelImage->mt->region->pitch, intelImage->mt->region->bo, 0, intelImage->mt->region->tiling, irb->draw_x + x, irb->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { return false; } } if (copy_supported_with_alpha_override) intel_set_teximage_alpha_to_one(ctx, intelImage); return true; }
/** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. * * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, * but limited, pitches and sizes allowed. * * The src/dst coordinates are relative to the given level/slice of the * miptree. * * If @src_flip or @dst_flip is set, then the rectangle within that miptree * will be inverted (including scanline order) when copying. This is common * in GL when copying between window system and user-created * renderbuffers/textures. */ bool intel_miptree_blit(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, uint32_t src_x, uint32_t src_y, bool src_flip, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_slice, uint32_t dst_x, uint32_t dst_y, bool dst_flip, uint32_t width, uint32_t height, GLenum logicop) { /* The blitter doesn't understand multisampling at all. */ if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) return false; /* No sRGB decode or encode is done by the hardware blitter, which is * consistent with what we want in the callers (glCopyTexSubImage(), * glBlitFramebuffer(), texture validation, etc.). */ mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format); mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); /* The blitter doesn't support doing any format conversions. We do also * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ if (src_format != dst_format && ((src_format != MESA_FORMAT_B8G8R8A8_UNORM && src_format != MESA_FORMAT_B8G8R8X8_UNORM) || (dst_format != MESA_FORMAT_B8G8R8A8_UNORM && dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __FUNCTION__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; } /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the miptree's pitch is less than 32k. */ if (src_mt->pitch >= 32768 || dst_mt->pitch >= 32768) { perf_debug("Falling back due to >=32k pitch\n"); return false; } /* The blitter has no idea about HiZ or fast color clears, so we need to * resolve the miptrees before we do anything. */ intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice); intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice); intel_miptree_resolve_color(brw, src_mt); intel_miptree_resolve_color(brw, dst_mt); if (src_flip) src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height; if (dst_flip) dst_y = minify(dst_mt->physical_height0, dst_level - dst_mt->first_level) - dst_y - height; int src_pitch = src_mt->pitch; if (src_flip != dst_flip) src_pitch = -src_pitch; uint32_t src_image_x, src_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, &src_image_x, &src_image_y); src_x += src_image_x; src_y += src_image_y; /* The blitter interprets the 16-bit src x/y as a signed 16-bit value, * where negative values are invalid. The values we're working with are * unsigned, so make sure we don't overflow. */ if (src_x >= 32768 || src_y >= 32768) { perf_debug("Falling back due to >=32k src offset (%d, %d)\n", src_x, src_y); return false; } uint32_t dst_image_x, dst_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, &dst_image_x, &dst_image_y); dst_x += dst_image_x; dst_y += dst_image_y; /* The blitter interprets the 16-bit destination x/y as a signed 16-bit * value. The values we're working with are unsigned, so make sure we * don't overflow. */ if (dst_x >= 32768 || dst_y >= 32768) { perf_debug("Falling back due to >=32k dst offset (%d, %d)\n", dst_x, dst_y); return false; } if (!intelEmitCopyBlit(brw, src_mt->cpp, src_pitch, src_mt->bo, src_mt->offset, src_mt->tiling, dst_mt->pitch, dst_mt->bo, dst_mt->offset, dst_mt->tiling, src_x, src_y, dst_x, dst_y, width, height, logicop)) { return false; } if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM && dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) { intel_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height); } return true; }
/** * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. */ static bool do_blit_copypixels(struct gl_context * ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type) { struct intel_context *intel = intel_context(ctx); struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_framebuffer *read_fb = ctx->ReadBuffer; GLint orig_dstx; GLint orig_dsty; GLint orig_srcx; GLint orig_srcy; bool flip = false; struct intel_renderbuffer *draw_irb = NULL; struct intel_renderbuffer *read_irb = NULL; gl_format read_format, draw_format; /* Update draw buffer bounds */ _mesa_update_state(ctx); switch (type) { case GL_COLOR: if (fb->_NumColorDrawBuffers != 1) { perf_debug("glCopyPixels() fallback: MRT\n"); return false; } draw_irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); read_irb = intel_renderbuffer(read_fb->_ColorReadBuffer); break; case GL_DEPTH_STENCIL_EXT: draw_irb = intel_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); read_irb = intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); break; case GL_DEPTH: perf_debug("glCopyPixels() fallback: GL_DEPTH\n"); return false; case GL_STENCIL: perf_debug("glCopyPixels() fallback: GL_STENCIL\n"); return false; default: perf_debug("glCopyPixels(): Unknown type\n"); return false; } if (!draw_irb) { perf_debug("glCopyPixels() fallback: missing draw buffer\n"); return false; } if (!read_irb) { perf_debug("glCopyPixels() fallback: missing read buffer\n"); return false; } read_format = intel_rb_format(read_irb); draw_format = intel_rb_format(draw_irb); if (draw_format != read_format && !(draw_format == MESA_FORMAT_XRGB8888 && read_format == MESA_FORMAT_ARGB8888)) { perf_debug("glCopyPixels() fallback: mismatched formats (%s -> %s\n", _mesa_get_format_name(read_format), _mesa_get_format_name(draw_format)); return false; } /* Copypixels can be more than a straight copy. Ensure all the * extra operations are disabled: */ if (!intel_check_copypixel_blit_fragment_ops(ctx) || ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) return false; intel_prepare_render(intel); intel_flush(&intel->ctx); /* Clip to destination buffer. */ orig_dstx = dstx; orig_dsty = dsty; if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax, &dstx, &dsty, &width, &height)) goto out; /* Adjust src coords for our post-clipped destination origin */ srcx += dstx - orig_dstx; srcy += dsty - orig_dsty; /* Clip to source buffer. */ orig_srcx = srcx; orig_srcy = srcy; if (!_mesa_clip_to_region(0, 0, read_fb->Width, read_fb->Height, &srcx, &srcy, &width, &height)) goto out; /* Adjust dst coords for our post-clipped source origin */ dstx += srcx - orig_srcx; dsty += srcy - orig_srcy; /* Flip dest Y if it's a window system framebuffer. */ if (_mesa_is_winsys_fbo(fb)) { /* copypixels to a window system framebuffer */ dsty = fb->Height - dsty - height; flip = !flip; } /* Flip source Y if it's a window system framebuffer. */ if (_mesa_is_winsys_fbo(read_fb)) { srcy = read_fb->Height - srcy - height; flip = !flip; } srcx += read_irb->draw_x; srcy += read_irb->draw_y; dstx += draw_irb->draw_x; dsty += draw_irb->draw_y; uint32_t src_pitch = read_irb->mt->region->pitch; if (flip) src_pitch = -src_pitch; if (!intelEmitCopyBlit(intel, draw_irb->mt->cpp, src_pitch, read_irb->mt->region->bo, 0, read_irb->mt->region->tiling, draw_irb->mt->region->pitch, draw_irb->mt->region->bo, 0, draw_irb->mt->region->tiling, srcx, srcy, dstx, dsty, width, height, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY)) { DBG("%s: blit failure\n", __FUNCTION__); return false; } out: intel_check_front_buffer_rendering(intel); DBG("%s: success\n", __FUNCTION__); return true; }
/* Pros: * - no waiting for idle before updating framebuffer. * * Cons: * - if upload is by memcpy, this may actually be slower than fallback path. * - uploads the whole image even if destination is clipped * * Need to benchmark. * * Given the questions about performance, implement for pbo's only. * This path is definitely a win if the pbo is already in agp. If it * turns out otherwise, we can add the code necessary to upload client * data to agp space before performing the blit. (Though it may turn * out to be better/simpler just to use the texture engine). */ static GLboolean do_blit_drawpixels(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid * pixels) { struct intel_context *intel = intel_context(ctx); struct intel_region *dest = intel_drawbuf_region(intel); struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; struct _DriFenceObject *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); if (!dest) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - no dest\n", __FUNCTION__); return GL_FALSE; } if (src) { /* This validation should be done by core mesa: */ if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, format, type, pixels)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); return GL_TRUE; } } else { /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } if (!intel_check_blit_format(dest, format, type)) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad format for blit\n", __FUNCTION__); return GL_FALSE; } if (!intel_check_blit_fragment_ops(ctx)) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad GL fragment state for blitter\n", __FUNCTION__); return GL_FALSE; } if (ctx->Pixel.ZoomX != 1.0F) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__); return GL_FALSE; } if (unpack->RowLength > 0) rowLength = unpack->RowLength; else rowLength = width; if (ctx->Pixel.ZoomY == -1.0F) { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__); return GL_FALSE; /* later */ y -= height; } else if (ctx->Pixel.ZoomY == 1.0F) { rowLength = -rowLength; } else { if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__); return GL_FALSE; } src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height, format, type, 0, 0, 0); intelFlush(&intel->ctx); LOCK_HARDWARE(intel); if (intel->driDrawable->numClipRects) { __DRIdrawablePrivate *dPriv = intel->driDrawable; int nbox = dPriv->numClipRects; drm_clip_rect_t *box = dPriv->pClipRects; drm_clip_rect_t rect; drm_clip_rect_t dest_rect; struct _DriBufferObject *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ); int i; dest_rect.x1 = dPriv->x + x; dest_rect.y1 = dPriv->y + dPriv->h - (y + height); dest_rect.x2 = dest_rect.x1 + width; dest_rect.y2 = dest_rect.y1 + height; for (i = 0; i < nbox; i++) { if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) continue; intelEmitCopyBlit(intel, dest->cpp, rowLength, src_buffer, src_offset, dest->pitch, dest->buffer, 0, rect.x1 - dest_rect.x1, rect.y2 - dest_rect.y2, rect.x1, rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } fence = intel_batchbuffer_flush(intel->batch); driFenceReference(fence); } UNLOCK_HARDWARE(intel); if (fence) { driFenceFinish(fence, DRM_FENCE_TYPE_EXE | DRM_I915_FENCE_TYPE_RW, GL_FALSE); driFenceUnReference(fence); } if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); return GL_TRUE; }