Esempio n. 1
0
/**
 * Returns the mask of how many bits of x and y must be handled through the
 * depthbuffer's draw offset x and y fields.
 *
 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
 * between the depth, hiz, and stencil buffers.  Because it can be hard to get
 * all 3 to agree on this value, we want to do as much drawing offset
 * adjustment as possible by moving the base offset of the 3 buffers, which is
 * restricted to tile boundaries.
 *
 * For each buffer, the remainder must be applied through the x/y draw offset.
 * This returns the worst-case mask of the low bits that have to go into the
 * packet.  If the 3 buffers don't agree on the drawing offset ANDed with this
 * mask, then we're in trouble.
 */
void
brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
                                uint32_t depth_level,
                                uint32_t depth_layer,
                                struct intel_mipmap_tree *stencil_mt,
                                uint32_t *out_tile_mask_x,
                                uint32_t *out_tile_mask_y)
{
   uint32_t tile_mask_x = 0, tile_mask_y = 0;

   if (depth_mt) {
      intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
                           depth_mt->cpp, false,
                           &tile_mask_x, &tile_mask_y);

      if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
         intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
                              depth_mt->hiz_buf->mt->tr_mode,
                              depth_mt->hiz_buf->mt->cpp,
                              false, &hiz_tile_mask_x,
                              &hiz_tile_mask_y);

         /* Each HiZ row represents 2 rows of pixels */
         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;

         tile_mask_x |= hiz_tile_mask_x;
         tile_mask_y |= hiz_tile_mask_y;
      }
   }

   if (stencil_mt) {
      if (stencil_mt->stencil_mt)
	 stencil_mt = stencil_mt->stencil_mt;

      if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
         /* Separate stencil buffer uses 64x64 tiles. */
         tile_mask_x |= 63;
         tile_mask_y |= 63;
      } else {
         uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
         intel_get_tile_masks(stencil_mt->tiling,
                              stencil_mt->tr_mode,
                              stencil_mt->cpp,
                              false, &stencil_tile_mask_x,
                              &stencil_tile_mask_y);

         tile_mask_x |= stencil_tile_mask_x;
         tile_mask_y |= stencil_tile_mask_y;
      }
   }

   *out_tile_mask_x = tile_mask_x;
   *out_tile_mask_y = tile_mask_y;
}
brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
                                     unsigned int level,
                                     unsigned int layer,
                                     gen6_hiz_op op)
{
   this->hiz_op = op;

   depth.set(mt, level, layer);

   /* Align the rectangle primitive to 8x4 pixels.
    *
    * During fast depth clears, the emitted rectangle primitive  must be
    * aligned to 8x4 pixels.  From the Ivybridge PRM, Vol 2 Part 1 Section
    * 11.5.3.1 Depth Buffer Clear (and the matching section in the Sandybridge
    * PRM):
    *     If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
    *     aligned to an 8x4 pixel block relative to the upper left corner
    *     of the depth buffer [...]
    *
    * For hiz resolves, the rectangle must also be 8x4 aligned. Item
    * WaHizAmbiguate8x4Aligned from the Haswell workarounds page and the
    * Ivybridge simulator require the alignment.
    *
    * To be safe, let's just align the rect for all hiz operations and all
    * hardware generations.
    *
    * However, for some miptree slices of a Z24 texture, emitting an 8x4
    * aligned rectangle that covers the slice may clobber adjacent slices if
    * we strictly adhered to the texture alignments specified in the PRM.  The
    * Ivybridge PRM, Section "Alignment Unit Size", states that
    * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 surfaces,
    * not 8. But commit 1f112cc increased the alignment from 4 to 8, which
    * prevents the clobbering.
    */
   depth.width = ALIGN(depth.width, 8);
   depth.height = ALIGN(depth.height, 4);

   x1 = depth.width;
   y1 = depth.height;

   assert(intel_miptree_level_has_hiz(mt, level));

   switch (mt->format) {
   case MESA_FORMAT_Z_UNORM16:       depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
   case MESA_FORMAT_Z_FLOAT32: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
   case MESA_FORMAT_Z24_UNORM_X8_UINT:    depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
   default:                    unreachable("not reached");
   }
}
Esempio n. 3
0
static void
gen6_blorp_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
                    unsigned int level, unsigned int layer, enum blorp_hiz_op op)
{
   assert(intel_miptree_level_has_hiz(mt, level));

   struct isl_surf isl_tmp[2];
   struct blorp_surf surf;
   blorp_surf_for_miptree(brw, &surf, mt, true, (1 << ISL_AUX_USAGE_HIZ),
                          &level, layer, 1, isl_tmp);

   struct blorp_batch batch;
   blorp_batch_init(&brw->blorp, &batch, brw, 0);
   blorp_gen6_hiz_op(&batch, &surf, level, layer, op);
   blorp_batch_finish(&batch);
}
Esempio n. 4
0
/**
 * Returns the mask of how many bits of x and y must be handled through the
 * depthbuffer's draw offset x and y fields.
 *
 * The draw offset x/y field of the depthbuffer packet is unfortunately shared
 * between the depth, hiz, and stencil buffers.  Because it can be hard to get
 * all 3 to agree on this value, we want to do as much drawing offset
 * adjustment as possible by moving the base offset of the 3 buffers, which is
 * restricted to tile boundaries.
 *
 * For each buffer, the remainder must be applied through the x/y draw offset.
 * This returns the worst-case mask of the low bits that have to go into the
 * packet.  If the 3 buffers don't agree on the drawing offset ANDed with this
 * mask, then we're in trouble.
 */
static void
brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
                                uint32_t depth_level,
                                uint32_t depth_layer,
                                struct intel_mipmap_tree *stencil_mt,
                                uint32_t *out_tile_mask_x,
                                uint32_t *out_tile_mask_y)
{
   uint32_t tile_mask_x = 0, tile_mask_y = 0;

   if (depth_mt) {
      intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
                           depth_mt->cpp,
                           &tile_mask_x, &tile_mask_y);
      assert(!intel_miptree_level_has_hiz(depth_mt, depth_level));
   }

   if (stencil_mt) {
      if (stencil_mt->stencil_mt)
	 stencil_mt = stencil_mt->stencil_mt;

      if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
         /* Separate stencil buffer uses 64x64 tiles. */
         tile_mask_x |= 63;
         tile_mask_y |= 63;
      } else {
         uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
         intel_get_tile_masks(stencil_mt->tiling,
                              stencil_mt->tr_mode,
                              stencil_mt->cpp,
                              &stencil_tile_mask_x,
                              &stencil_tile_mask_y);

         tile_mask_x |= stencil_tile_mask_x;
         tile_mask_y |= stencil_tile_mask_y;
      }
   }

   *out_tile_mask_x = tile_mask_x;
   *out_tile_mask_y = tile_mask_y;
}
Esempio n. 5
0
/**
 * Implements fast depth clears on gen6+.
 *
 * Fast clears basically work by setting a flag in each of the subspans
 * represented in the HiZ buffer that says "When you need the depth values for
 * this subspan, it's the hardware's current clear value."  Then later rendering
 * can just use the static clear value instead of referencing memory.
 *
 * The tricky part of the implementation is that you have to have the clear
 * value that was used on the depth buffer in place for all further rendering,
 * at least until a resolve to the real depth buffer happens.
 */
static bool
brw_fast_clear_depth(struct gl_context *ctx)
{
   struct brw_context *brw = brw_context(ctx);
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   struct intel_renderbuffer *depth_irb =
      intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_mipmap_tree *mt = depth_irb->mt;
   struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
   const struct gen_device_info *devinfo = &brw->screen->devinfo;

   if (devinfo->gen < 6)
      return false;

   if (!intel_renderbuffer_has_hiz(depth_irb))
      return false;

   /* We only handle full buffer clears -- otherwise you'd have to track whether
    * a previous clear had happened at a different clear value and resolve it
    * first.
    */
   if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) {
      perf_debug("Failed to fast clear %dx%d depth because of scissors.  "
                 "Possible 5%% performance win if avoided.\n",
                 mt->surf.logical_level0_px.width,
                 mt->surf.logical_level0_px.height);
      return false;
   }

   switch (mt->format) {
   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
   case MESA_FORMAT_Z24_UNORM_S8_UINT:
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
       *      enabled (the legacy method of clearing must be performed):
       *
       *      - If the depth buffer format is D32_FLOAT_S8X24_UINT or
       *        D24_UNORM_S8_UINT.
       */
      return false;

   case MESA_FORMAT_Z_UNORM16:
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
       *      enabled (the legacy method of clearing must be performed):
       *
       *      - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
       *        width of the map (LOD0) is not multiple of 16, fast clear
       *        optimization must be disabled.
       */
      if (devinfo->gen == 6 &&
          (minify(mt->surf.phys_level0_sa.width,
                  depth_irb->mt_level - mt->first_level) % 16) != 0)
	 return false;
      break;

   default:
      break;
   }

   /* Quantize the clear value to what can be stored in the actual depth
    * buffer.  This makes the following check more accurate because it now
    * checks if the actual depth bits will match.  It also prevents us from
    * getting a too-accurate depth value during depth testing or when sampling
    * with HiZ enabled.
    */
   float clear_value =
      mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear :
      (unsigned)(ctx->Depth.Clear * fb->_DepthMax) / (float)fb->_DepthMax;

   const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;

   /* If we're clearing to a new clear value, then we need to resolve any clear
    * flags out of the HiZ buffer into the real depth buffer.
    */
   if (mt->fast_clear_color.f32[0] != clear_value) {
      for (uint32_t level = mt->first_level; level <= mt->last_level; level++) {
         if (!intel_miptree_level_has_hiz(mt, level))
            continue;

         const unsigned level_layers = brw_get_num_logical_layers(mt, level);

         for (uint32_t layer = 0; layer < level_layers; layer++) {
            if (level == depth_irb->mt_level &&
                layer >= depth_irb->mt_layer &&
                layer < depth_irb->mt_layer + num_layers) {
               /* We're going to clear this layer anyway.  Leave it alone. */
               continue;
            }

            enum isl_aux_state aux_state =
               intel_miptree_get_aux_state(mt, level, layer);

            if (aux_state != ISL_AUX_STATE_CLEAR &&
                aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
               /* This slice doesn't have any fast-cleared bits. */
               continue;
            }

            /* If we got here, then the level may have fast-clear bits that
             * use the old clear value.  We need to do a depth resolve to get
             * rid of their use of the clear value before we can change it.
             * Fortunately, few applications ever change their depth clear
             * value so this shouldn't happen often.
             */
            intel_hiz_exec(brw, mt, level, layer, 1,
                           ISL_AUX_OP_FULL_RESOLVE);
            intel_miptree_set_aux_state(brw, mt, level, layer, 1,
                                        ISL_AUX_STATE_RESOLVED);
         }
      }

      intel_miptree_set_depth_clear_value(ctx, mt, clear_value);
   }

   bool need_clear = false;
   for (unsigned a = 0; a < num_layers; a++) {
      enum isl_aux_state aux_state =
         intel_miptree_get_aux_state(mt, depth_irb->mt_level,
                                     depth_irb->mt_layer + a);

      if (aux_state != ISL_AUX_STATE_CLEAR) {
         need_clear = true;
         break;
      }
   }

   if (!need_clear) {
      /* If all of the layers we intend to clear are already in the clear
       * state then simply updating the miptree fast clear value is sufficient
       * to change their clear value.
       */
      return true;
   }

   for (unsigned a = 0; a < num_layers; a++) {
      enum isl_aux_state aux_state =
         intel_miptree_get_aux_state(mt, depth_irb->mt_level,
                                     depth_irb->mt_layer + a);

      if (aux_state != ISL_AUX_STATE_CLEAR) {
         intel_hiz_exec(brw, mt, depth_irb->mt_level,
                        depth_irb->mt_layer + a, 1,
                        ISL_AUX_OP_FAST_CLEAR);
      }
   }

   /* Now, the HiZ buffer contains data that needs to be resolved to the depth
    * buffer.
    */
   intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
                               depth_irb->mt_layer, num_layers,
                               ISL_AUX_STATE_CLEAR);

   return true;
}