/** * Used to initialize the alpha value of an ARGB8888 miptree after copying * into it from an XRGB8888 source. * * This is very common with glCopyTexImage2D(). Note that the coordinates are * relative to the start of the miptree, not relative to a slice within the * miptree. */ static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, int x, int y, int width, int height) { uint32_t BR13, CMD; int pitch, cpp; drm_intel_bo *aper_array[2]; pitch = mt->pitch; cpp = mt->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", __FUNCTION__, mt->bo, pitch, x, y, width, height); BR13 = br13_for_cpp(cpp) | 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; CMD |= XY_BLT_WRITE_ALPHA; if (mt->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } BR13 |= pitch; /* do space check before going any further */ aper_array[0] = brw->batch.bo; aper_array[1] = mt->bo; if (drm_intel_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)) != 0) { intel_batchbuffer_flush(brw); } unsigned length = brw->gen >= 8 ? 7 : 6; bool dst_y_tiled = mt->tiling == I915_TILING_Y; BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13); OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X)); OUT_BATCH(SET_FIELD(y + height, BLT_Y) | SET_FIELD(x + width, BLT_X)); if (brw->gen >= 8) { OUT_RELOC64(mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } else { OUT_RELOC(mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ ADVANCE_BATCH_TILED(dst_y_tiled, false); intel_batchbuffer_emit_mi_flush(brw); }
/** * Used to initialize the alpha value of an ARGB8888 miptree after copying * into it from an XRGB8888 source. * * This is very common with glCopyTexImage2D(). Note that the coordinates are * relative to the start of the miptree, not relative to a slice within the * miptree. */ static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, int x, int y, int width, int height) { struct intel_region *region = mt->region; uint32_t BR13, CMD; int pitch, cpp; drm_intel_bo *aper_array[2]; pitch = region->pitch; cpp = region->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", __FUNCTION__, region->bo, pitch, x, y, width, height); BR13 = br13_for_cpp(cpp) | 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; CMD |= XY_BLT_WRITE_ALPHA; if (region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } BR13 |= pitch; /* do space check before going any further */ aper_array[0] = brw->batch.bo; aper_array[1] = region->bo; if (drm_intel_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)) != 0) { intel_batchbuffer_flush(brw); } bool dst_y_tiled = region->tiling == I915_TILING_Y; BEGIN_BATCH_BLT_TILED(6, dst_y_tiled, false); OUT_BATCH(CMD | (6 - 2)); OUT_BATCH(BR13); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + height) << 16) | (x + width)); OUT_RELOC_FENCED(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ ADVANCE_BATCH_TILED(dst_y_tiled, false); intel_batchbuffer_emit_mi_flush(brw); }
/* Copy BitBlt */ bool intelEmitCopyBlit(struct brw_context *brw, GLuint cpp, GLshort src_pitch, drm_intel_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, GLshort dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, GLenum logic_op) { GLuint CMD, BR13, pass = 0; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; drm_intel_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return false; } if (src_tiling != I915_TILING_NONE) { if (src_offset & 4095) return false; } if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) return false; /* do space check before going any further */ do { aper_array[0] = brw->batch.bo; aper_array[1] = dst_buffer; aper_array[2] = src_buffer; if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { intel_batchbuffer_flush(brw); pass++; } else break; } while (pass < 2); if (pass >= 2) return false; intel_batchbuffer_require_space(brw, 8 * 4, true); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop * the low bits. */ if (src_pitch % 4 != 0 || dst_pitch % 4 != 0) return false; /* For big formats (such as floating point), do the copy using 16 or 32bpp * and multiply the coordinates. */ if (cpp > 4) { if (cpp % 4 == 2) { dst_x *= cpp / 2; dst_x2 *= cpp / 2; src_x *= cpp / 2; cpp = 2; } else { assert(cpp % 4 == 0); dst_x *= cpp / 4; dst_x2 *= cpp / 4; src_x *= cpp / 4; cpp = 4; } } BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; switch (cpp) { case 1: case 2: CMD = XY_SRC_COPY_BLT_CMD; break; case 4: CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: return false; } if (dst_tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } if (src_tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; src_pitch /= 4; } if (dst_y2 <= dst_y || dst_x2 <= dst_x) { return true; } assert(dst_x < dst_x2); assert(dst_y < dst_y2); BEGIN_BATCH_BLT_TILED(8, dst_y_tiled, src_y_tiled); OUT_BATCH(CMD | (8 - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); OUT_RELOC_FENCED(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH((uint16_t)src_pitch); OUT_RELOC_FENCED(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset); ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); intel_batchbuffer_emit_mi_flush(brw); return true; }
/** * Used to initialize the alpha value of an ARGB8888 miptree after copying * into it from an XRGB8888 source. * * This is very common with glCopyTexImage2D(). Note that the coordinates are * relative to the start of the miptree, not relative to a slice within the * miptree. */ static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, int x, int y, int width, int height) { const struct gen_device_info *devinfo = &brw->screen->devinfo; uint32_t BR13, CMD; int pitch, cpp; pitch = mt->surf.row_pitch_B; cpp = mt->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", __func__, mt->bo, pitch, x, y, width, height); /* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit * alpha channel would be likely possible via ROP code 0xfa instead of 0xf0 * and writing a suitable bit-mask instead of 0xffffffff. */ BR13 = br13_for_cpp(cpp) | 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; CMD |= XY_BLT_WRITE_ALPHA; if (mt->surf.tiling != ISL_TILING_LINEAR) { CMD |= XY_DST_TILED; pitch /= 4; } BR13 |= pitch; /* do space check before going any further */ if (!brw_batch_has_aperture_space(brw, mt->bo->size)) intel_batchbuffer_flush(brw); unsigned length = devinfo->gen >= 8 ? 7 : 6; const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0; /* We need to split the blit into chunks that each fit within the blitter's * restrictions. We can't use a chunk size of 32768 because we need to * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's * a nice round power of two, big enough that performance won't suffer, and * small enough to guarantee everything fits. */ const uint32_t max_chunk_size = 16384; for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) { for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) { const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x); const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y); uint32_t offset, tile_x, tile_y; get_blit_intratile_offset_el(brw, mt, x + chunk_x, y + chunk_y, &offset, &tile_x, &tile_y); BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13); OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) | SET_FIELD(x + chunk_x, BLT_X)); OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) | SET_FIELD(x + chunk_x + chunk_w, BLT_X)); if (devinfo->gen >= 8) { OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset); } else { OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset); } OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ ADVANCE_BATCH_TILED(dst_y_tiled, false); } } brw_emit_mi_flush(brw); }
/* Copy BitBlt */ static bool emit_copy_blit(struct brw_context *brw, GLuint cpp, int32_t src_pitch, struct brw_bo *src_buffer, GLuint src_offset, enum isl_tiling src_tiling, int32_t dst_pitch, struct brw_bo *dst_buffer, GLuint dst_offset, enum isl_tiling dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, enum gl_logicop_mode logic_op) { const struct gen_device_info *devinfo = &brw->screen->devinfo; GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; bool dst_y_tiled = dst_tiling == ISL_TILING_Y0; bool src_y_tiled = src_tiling == ISL_TILING_Y0; uint32_t src_tile_w, src_tile_h; uint32_t dst_tile_w, dst_tile_h; if ((dst_y_tiled || src_y_tiled) && devinfo->gen < 6) return false; const unsigned bo_sizes = dst_buffer->size + src_buffer->size; /* do space check before going any further */ if (!brw_batch_has_aperture_space(brw, bo_sizes)) intel_batchbuffer_flush(brw); if (!brw_batch_has_aperture_space(brw, bo_sizes)) return false; unsigned length = devinfo->gen >= 8 ? 10 : 8; intel_batchbuffer_require_space(brw, length * 4); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __func__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); intel_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h); intel_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h); /* For Tiled surfaces, the pitch has to be a multiple of the Tile width * (X direction width of the Tile). This is ensured while allocating the * buffer object. */ assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0); assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0); /* For big formats (such as floating point), do the copy using 16 or * 32bpp and multiply the coordinates. */ if (cpp > 4) { if (cpp % 4 == 2) { dst_x *= cpp / 2; dst_x2 *= cpp / 2; src_x *= cpp / 2; cpp = 2; } else { assert(cpp % 4 == 0); dst_x *= cpp / 4; dst_x2 *= cpp / 4; src_x *= cpp / 4; cpp = 4; } } if (!alignment_valid(brw, dst_offset, dst_tiling)) return false; if (!alignment_valid(brw, src_offset, src_tiling)) return false; /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop * the low bits. Offsets must be naturally aligned. */ if (src_pitch % 4 != 0 || src_offset % cpp != 0 || dst_pitch % 4 != 0 || dst_offset % cpp != 0) return false; assert(cpp <= 4); BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp); /* For tiled source and destination, pitch value should be specified * as a number of Dwords. */ if (dst_tiling != ISL_TILING_LINEAR) dst_pitch /= 4; if (src_tiling != ISL_TILING_LINEAR) src_pitch /= 4; if (dst_y2 <= dst_y || dst_x2 <= dst_x) return true; assert(dst_x < dst_x2); assert(dst_y < dst_y2); BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X)); OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X)); if (devinfo->gen >= 8) { OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X)); OUT_BATCH((uint16_t)src_pitch); if (devinfo->gen >= 8) { OUT_RELOC64(src_buffer, 0, src_offset); } else { OUT_RELOC(src_buffer, 0, src_offset); } ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); brw_emit_mi_flush(brw); return true; }