static void apply_gen6_stencil_hiz_offset(struct isl_surf *surf, struct intel_mipmap_tree *mt, uint32_t lod, uint32_t *offset) { assert(mt->array_layout == ALL_SLICES_AT_EACH_LOD); if (mt->format == MESA_FORMAT_S_UINT8) { /* Note: we can't compute the stencil offset using * intel_miptree_get_aligned_offset(), because the miptree * claims that the region is untiled even though it's W tiled. */ *offset = mt->level[lod].level_y * mt->pitch + mt->level[lod].level_x * 64; } else { *offset = intel_miptree_get_aligned_offset(mt, mt->level[lod].level_x, mt->level[lod].level_y); } surf->logical_level0_px.width = minify(surf->logical_level0_px.width, lod); surf->logical_level0_px.height = minify(surf->logical_level0_px.height, lod); surf->phys_level0_sa.width = minify(surf->phys_level0_sa.width, lod); surf->phys_level0_sa.height = minify(surf->phys_level0_sa.height, lod); surf->levels = 1; surf->array_pitch_el_rows = ALIGN(surf->phys_level0_sa.height, surf->image_alignment_el.height); }
/** * Split x_offset and y_offset into a base offset (in bytes) and a remaining * x/y offset (in pixels). Note: we can't do this by calling * intel_renderbuffer_tile_offsets(), because the offsets may have been * adjusted to account for Y vs. W tiling differences. So we compute it * directly from the adjusted offsets. */ uint32_t brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x, uint32_t *tile_y) const { uint32_t mask_x, mask_y; intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, map_stencil_as_y_tiled); *tile_x = x_offset & mask_x; *tile_y = y_offset & mask_y; return intel_miptree_get_aligned_offset(mt, x_offset & ~mask_x, y_offset & ~mask_y, map_stencil_as_y_tiled); }
static void gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, const struct brw_blorp_params *params) { uint32_t surfwidth, surfheight; uint32_t surftype; unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1); GLenum gl_target = params->depth.mt->target; unsigned int lod; switch (gl_target) { case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP: /* The PRM claims that we should use BRW_SURFACE_CUBE for this * situation, but experiments show that gl_Layer doesn't work when we do * this. So we use BRW_SURFACE_2D, since for rendering purposes this is * equivalent. */ surftype = BRW_SURFACE_2D; depth *= 6; break; default: surftype = translate_tex_target(gl_target); break; } const unsigned min_array_element = params->depth.layer; lod = params->depth.level - params->depth.mt->first_level; if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) { /* HIZ ops for lod 0 may set the width & height a little * larger to allow the fast depth clear to fit the hardware * alignment requirements. (8x4) */ surfwidth = params->depth.width; surfheight = params->depth.height; } else { surfwidth = params->depth.mt->logical_width0; surfheight = params->depth.mt->logical_height0; } /* 3DSTATE_DEPTH_BUFFER */ { brw_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); /* 3DSTATE_DEPTH_BUFFER dw0 */ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); /* 3DSTATE_DEPTH_BUFFER dw1 */ OUT_BATCH((params->depth.mt->pitch - 1) | params->depth_format << 18 | 1 << 21 | /* separate stencil enable */ 1 << 22 | /* hiz enable */ BRW_TILEWALK_YMAJOR << 26 | 1 << 27 | /* y-tiled */ surftype << 29); /* 3DSTATE_DEPTH_BUFFER dw2 */ OUT_RELOC(params->depth.mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); /* 3DSTATE_DEPTH_BUFFER dw3 */ OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | (surfwidth - 1) << 6 | (surfheight - 1) << 19 | lod << 2); /* 3DSTATE_DEPTH_BUFFER dw4 */ OUT_BATCH((depth - 1) << 21 | min_array_element << 10 | (depth - 1) << 1); /* 3DSTATE_DEPTH_BUFFER dw5 */ OUT_BATCH(0); /* 3DSTATE_DEPTH_BUFFER dw6 */ OUT_BATCH(0); ADVANCE_BATCH(); } /* 3DSTATE_HIER_DEPTH_BUFFER */ { struct intel_mipmap_tree *hiz_mt = params->depth.mt->hiz_buf->mt; uint32_t offset = 0; if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { offset = intel_miptree_get_aligned_offset(hiz_mt, hiz_mt->level[lod].level_x, hiz_mt->level[lod].level_y, false); } BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(hiz_mt->pitch - 1); OUT_RELOC(hiz_mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, offset); ADVANCE_BATCH(); } /* 3DSTATE_STENCIL_BUFFER */ { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } }
void gen6_emit_depth_stencil_hiz(struct brw_context *brw, struct intel_mipmap_tree *depth_mt, uint32_t depth_offset, uint32_t depthbuffer_format, uint32_t depth_surface_type, struct intel_mipmap_tree *stencil_mt, bool hiz, bool separate_stencil, uint32_t width, uint32_t height, uint32_t tile_x, uint32_t tile_y) { struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; uint32_t surftype; unsigned int depth = 1; GLenum gl_target = GL_TEXTURE_2D; unsigned int lod; const struct intel_mipmap_tree *mt = depth_mt ? depth_mt : stencil_mt; const struct intel_renderbuffer *irb = NULL; const struct gl_renderbuffer *rb = NULL; /* Enable the hiz bit if we're doing separate stencil, because it and the * separate stencil bit must have the same value. From Section 2.11.5.6.1.1 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable": * [DevIL]: If this field is enabled, Hierarchical Depth Buffer * Enable must also be enabled. * * [DevGT]: This field must be set to the same value (enabled or * disabled) as Hierarchical Depth Buffer Enable */ bool enable_hiz_ss = hiz || separate_stencil; brw_emit_depth_stall_flushes(brw); irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); if (!irb) irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); rb = (struct gl_renderbuffer*) irb; if (rb) { depth = MAX2(irb->layer_count, 1); if (rb->TexImage) gl_target = rb->TexImage->TexObject->Target; } switch (gl_target) { case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP: /* The PRM claims that we should use BRW_SURFACE_CUBE for this * situation, but experiments show that gl_Layer doesn't work when we do * this. So we use BRW_SURFACE_2D, since for rendering purposes this is * equivalent. */ surftype = BRW_SURFACE_2D; depth *= 6; break; case GL_TEXTURE_3D: assert(mt); depth = MAX2(mt->logical_depth0, 1); /* fallthrough */ default: surftype = translate_tex_target(gl_target); break; } const unsigned min_array_element = irb ? irb->mt_layer : 0; lod = irb ? irb->mt_level - irb->mt->first_level : 0; if (mt) { width = mt->logical_width0; height = mt->logical_height0; } BEGIN_BATCH(7); /* 3DSTATE_DEPTH_BUFFER dw0 */ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); /* 3DSTATE_DEPTH_BUFFER dw1 */ OUT_BATCH((depth_mt ? depth_mt->pitch - 1 : 0) | (depthbuffer_format << 18) | ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */ ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */ (BRW_TILEWALK_YMAJOR << 26) | ((depth_mt ? depth_mt->tiling != I915_TILING_NONE : 1) << 27) | (surftype << 29)); /* 3DSTATE_DEPTH_BUFFER dw2 */ if (depth_mt) { OUT_RELOC(depth_mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } else { OUT_BATCH(0); } /* 3DSTATE_DEPTH_BUFFER dw3 */ OUT_BATCH(((width - 1) << 6) | ((height - 1) << 19) | lod << 2); /* 3DSTATE_DEPTH_BUFFER dw4 */ OUT_BATCH((depth - 1) << 21 | min_array_element << 10 | (depth - 1) << 1); /* 3DSTATE_DEPTH_BUFFER dw5 */ OUT_BATCH(0); assert(tile_x == 0 && tile_y == 0); /* 3DSTATE_DEPTH_BUFFER dw6 */ OUT_BATCH(0); ADVANCE_BATCH(); if (hiz || separate_stencil) { /* * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate * stencil enable' and 'hiz enable' bits were set. Therefore we must * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; * failure to do so causes hangs on gen5 and a stall on gen6. */ /* Emit hiz buffer. */ if (hiz) { struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt; uint32_t offset = 0; if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { offset = intel_miptree_get_aligned_offset( hiz_mt, hiz_mt->level[lod].level_x, hiz_mt->level[lod].level_y, false); } BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(hiz_mt->pitch - 1); OUT_RELOC(hiz_mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } /* Emit stencil buffer. */ if (separate_stencil) { uint32_t offset = 0; if (stencil_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { if (stencil_mt->format == MESA_FORMAT_S_UINT8) { /* Note: we can't compute the stencil offset using * intel_region_get_aligned_offset(), because stencil_region * claims that the region is untiled even though it's W tiled. */ offset = stencil_mt->level[lod].level_y * stencil_mt->pitch + stencil_mt->level[lod].level_x * 64; } else { offset = intel_miptree_get_aligned_offset( stencil_mt, stencil_mt->level[lod].level_x, stencil_mt->level[lod].level_y, false); } } BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); /* The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as * the stencil buffer is stored with two rows interleaved. */ OUT_BATCH(2 * stencil_mt->pitch - 1); OUT_RELOC(stencil_mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } } /* * On Gen >= 6, emit clear params for safety. If using hiz, then clear * params must be emitted. * * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS: * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | GEN5_DEPTH_CLEAR_VALID | (2 - 2)); OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0); ADVANCE_BATCH(); }
void brw_workaround_depthstencil_alignment(struct brw_context *brw, GLbitfield clear_mask) { struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; bool rebase_depth = false; bool rebase_stencil = false; struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct intel_mipmap_tree *depth_mt = NULL; struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; uint32_t stencil_draw_x = 0, stencil_draw_y = 0; bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH; bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL; if (depth_irb) depth_mt = depth_irb->mt; /* Initialize brw->depthstencil to 'nop' workaround state. */ brw->depthstencil.tile_x = 0; brw->depthstencil.tile_y = 0; brw->depthstencil.depth_offset = 0; brw->depthstencil.stencil_offset = 0; brw->depthstencil.hiz_offset = 0; brw->depthstencil.depth_mt = NULL; brw->depthstencil.stencil_mt = NULL; if (depth_irb) brw->depthstencil.depth_mt = depth_mt; if (stencil_irb) brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb); /* Gen6+ doesn't require the workarounds, since we always program the * surface state at the start of the whole surface. */ if (brw->gen >= 6) return; /* Check if depth buffer is in depth/stencil format. If so, then it's only * safe to invalidate it if we're also clearing stencil, and both depth_irb * and stencil_irb point to the same miptree. * * Note: it's not sufficient to check for the case where * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL, * because this fails to catch depth/stencil buffers on hardware that uses * separate stencil. To catch that case, we check whether * depth_mt->stencil_mt is non-NULL. */ if (depth_irb && invalidate_depth && (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL || depth_mt->stencil_mt)) { invalidate_depth = invalidate_stencil && depth_irb && stencil_irb && depth_irb->mt == stencil_irb->mt; } uint32_t tile_mask_x, tile_mask_y; brw_get_depthstencil_tile_masks(depth_mt, depth_mt ? depth_irb->mt_level : 0, depth_mt ? depth_irb->mt_layer : 0, stencil_mt, &tile_mask_x, &tile_mask_y); if (depth_irb) { tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth * Coordinate Offset X/Y": * * "The 3 LSBs of both offsets must be zero to ensure correct * alignment" */ if (tile_x & 7 || tile_y & 7) rebase_depth = true; /* We didn't even have intra-tile offsets before g45. */ if (!brw->has_surface_tile_offset) { if (tile_x || tile_y) rebase_depth = true; } if (rebase_depth) { perf_debug("HW workaround: blitting depth level %d to a temporary " "to fix alignment (depth tile offset %d,%d)\n", depth_irb->mt_level, tile_x, tile_y); intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); /* In the case of stencil_irb being the same packed depth/stencil * texture but not the same rb, make it point at our rebased mt, too. */ if (stencil_irb && stencil_irb != depth_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } stencil_mt = get_stencil_miptree(stencil_irb); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; } if (stencil_irb) { stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); int stencil_tile_x = stencil_draw_x & tile_mask_x; int stencil_tile_y = stencil_draw_y & tile_mask_y; /* If stencil doesn't match depth, then we'll need to rebase stencil * as well. (if we hadn't decided to rebase stencil before, the * post-stencil depth test will also rebase depth to try to match it * up). */ if (tile_x != stencil_tile_x || tile_y != stencil_tile_y) { rebase_stencil = true; } } } /* If we have (just) stencil, check it for ignored low bits as well */ if (stencil_irb) { intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); stencil_tile_x = stencil_draw_x & tile_mask_x; stencil_tile_y = stencil_draw_y & tile_mask_y; if (stencil_tile_x & 7 || stencil_tile_y & 7) rebase_stencil = true; if (!brw->has_surface_tile_offset) { if (stencil_tile_x || stencil_tile_y) rebase_stencil = true; } } if (rebase_stencil) { perf_debug("HW workaround: blitting stencil level %d to a temporary " "to fix alignment (stencil tile offset %d,%d)\n", stencil_irb->mt_level, stencil_tile_x, stencil_tile_y); intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil); stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); stencil_tile_x = stencil_draw_x & tile_mask_x; stencil_tile_y = stencil_draw_y & tile_mask_y; if (depth_irb && depth_irb->mt == stencil_irb->mt) { intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); intel_renderbuffer_set_draw_offset(depth_irb); } else if (depth_irb && !rebase_depth) { if (tile_x != stencil_tile_x || tile_y != stencil_tile_y) { perf_debug("HW workaround: blitting depth level %d to a temporary " "to match stencil level %d alignment (depth tile offset " "%d,%d, stencil offset %d,%d)\n", depth_irb->mt_level, stencil_irb->mt_level, tile_x, tile_y, stencil_tile_x, stencil_tile_y); intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; if (stencil_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } WARN_ONCE(stencil_tile_x != tile_x || stencil_tile_y != tile_y, "Rebased stencil tile offset (%d,%d) doesn't match depth " "tile offset (%d,%d).\n", stencil_tile_x, stencil_tile_y, tile_x, tile_y); } } } if (!depth_irb) { tile_x = stencil_tile_x; tile_y = stencil_tile_y; } /* While we just tried to get everything aligned, we may have failed to do * so in the case of rendering to array or 3D textures, where nonzero faces * will still have an offset post-rebase. At least give an informative * warning. */ WARN_ONCE((tile_x & 7) || (tile_y & 7), "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" "Truncating offset, bad rendering may occur.\n"); tile_x &= ~7; tile_y &= ~7; /* Now, after rebasing, save off the new dephtstencil state so the hardware * packets can just dereference that without re-calculating tile offsets. */ brw->depthstencil.tile_x = tile_x; brw->depthstencil.tile_y = tile_y; if (depth_irb) { depth_mt = depth_irb->mt; brw->depthstencil.depth_mt = depth_mt; brw->depthstencil.depth_offset = intel_miptree_get_aligned_offset(depth_mt, depth_irb->draw_x & ~tile_mask_x, depth_irb->draw_y & ~tile_mask_y, false); if (intel_renderbuffer_has_hiz(depth_irb)) { brw->depthstencil.hiz_offset = intel_miptree_get_aligned_offset(depth_mt, depth_irb->draw_x & ~tile_mask_x, (depth_irb->draw_y & ~tile_mask_y) / 2, false); } } if (stencil_irb) { stencil_mt = get_stencil_miptree(stencil_irb); brw->depthstencil.stencil_mt = stencil_mt; if (stencil_mt->format == MESA_FORMAT_S_UINT8) { /* Note: we can't compute the stencil offset using * intel_region_get_aligned_offset(), because stencil_region claims * that the region is untiled even though it's W tiled. */ brw->depthstencil.stencil_offset = (stencil_draw_y & ~tile_mask_y) * stencil_mt->pitch + (stencil_draw_x & ~tile_mask_x) * 64; } } }