Пример #1
0
    void
    brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt)
    {
        DBG("%s to mt %p\n", __FUNCTION__, mt);

        const mesa_format format = _mesa_get_srgb_format_linear(mt->format);

        struct brw_blorp_params params;
        brw_blorp_params_init(&params);

        brw_blorp_surface_info_init(brw, &params.dst, mt,
                                    0 /* level */, 0 /* layer */, format, true);

        brw_get_resolve_rect(brw, mt, &params.x0, &params.y0,
                             &params.x1, &params.y1);

        if (intel_miptree_is_lossless_compressed(brw, mt))
            params.resolve_type = GEN9_PS_RENDER_TARGET_RESOLVE_FULL;
        else
            params.resolve_type = GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE;

        /* Note: there is no need to initialize push constants because it doesn't
         * matter what data gets dispatched to the render target.  However, we must
         * ensure that the fragment shader delivers the data using the "replicated
         * color" message.
         */

        brw_blorp_params_get_clear_kernel(brw, &params, true);

        brw_blorp_exec(brw, &params);
        mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
    }
void
brw_meta_resolve_color(struct brw_context *brw,
                       struct intel_mipmap_tree *mt)
{
    struct gl_context *ctx = &brw->ctx;
    GLuint fbo;
    struct gl_renderbuffer *rb;
    struct rect rect;

    brw_emit_mi_flush(brw);

    _mesa_meta_begin(ctx, MESA_META_ALL);

    _mesa_GenFramebuffers(1, &fbo);
    rb = brw_get_rb_for_slice(brw, mt, 0, 0, false);

    _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
    _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0,
                                   rb);
    _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);

    brw_fast_clear_init(brw);

    use_rectlist(brw, true);

    brw_bind_rep_write_shader(brw, (float *) fast_clear_color);

    /* SKL+ also has a resolve mode for compressed render targets and thus more
     * bits to let us select the type of resolve.  For fast clear resolves, it
     * turns out we can use the same value as pre-SKL though.
     */
    if (intel_miptree_is_lossless_compressed(brw, mt))
        set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL);
    else
        set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);

    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
    get_resolve_rect(brw, mt, &rect);

    brw_draw_rectlist(brw, &rect, 1);

    set_fast_clear_op(brw, 0);
    use_rectlist(brw, false);

    _mesa_reference_renderbuffer(&rb, NULL);
    _mesa_DeleteFramebuffers(1, &fbo);

    _mesa_meta_end(ctx);

    /* We're typically called from intel_update_state() and we're supposed to
     * return with the state all updated to what it was before
     * brw_meta_resolve_color() was called.  The meta rendering will have
     * messed up the state and we need to call _mesa_update_state() again to
     * get back to where we were supposed to be when resolve was called.
     */
    if (ctx->NewState)
        _mesa_update_state(ctx);
}
Пример #3
0
/**
 * \brief Call this after drawing to mark which buffers need resolving
 *
 * If the depth buffer was written to and if it has an accompanying HiZ
 * buffer, then mark that it needs a depth resolve.
 *
 * If the color buffer is a multisample window system buffer, then
 * mark that it needs a downsample.
 *
 * Also mark any render targets which will be textured as needing a render
 * cache flush.
 */
static void
brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   struct gl_framebuffer *fb = ctx->DrawBuffer;

   struct intel_renderbuffer *front_irb = NULL;
   struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
   struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];

   if (_mesa_is_front_buffer_drawing(fb))
      front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);

   if (front_irb)
      front_irb->need_downsample = true;
   if (back_irb)
      back_irb->need_downsample = true;
   if (depth_irb && ctx->Depth.Mask) {
      intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
      brw_render_cache_set_add_bo(brw, depth_irb->mt->bo);
   }

   if (ctx->Extensions.ARB_stencil_texturing &&
       stencil_irb && ctx->Stencil._WriteEnabled) {
      brw_render_cache_set_add_bo(brw, stencil_irb->mt->bo);
   }

   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
      struct intel_renderbuffer *irb =
         intel_renderbuffer(fb->_ColorDrawBuffers[i]);

      if (irb) {
         brw_render_cache_set_add_bo(brw, irb->mt->bo);

         if (intel_miptree_is_lossless_compressed(brw, irb->mt)) {
            irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
         }
      }
   }
}
Пример #4
0
static bool
do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
                      struct gl_renderbuffer *rb, unsigned buf,
                      bool partial_clear, bool encode_srgb, unsigned layer)
{
    struct gl_context *ctx = &brw->ctx;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    mesa_format format = irb->mt->format;

    struct brw_blorp_params params;
    brw_blorp_params_init(&params);

    if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
        format = _mesa_get_srgb_format_linear(format);

    brw_blorp_surface_info_init(brw, &params.dst, irb->mt, irb->mt_level,
                                layer, format, true);

    /* Override the surface format according to the context's sRGB rules. */
    params.dst.brw_surfaceformat = brw->render_target_format[format];

    params.x0 = fb->_Xmin;
    params.x1 = fb->_Xmax;
    if (rb->Name != 0) {
        params.y0 = fb->_Ymin;
        params.y1 = fb->_Ymax;
    } else {
        params.y0 = rb->Height - fb->_Ymax;
        params.y1 = rb->Height - fb->_Ymin;
    }

    memcpy(&params.wm_inputs, ctx->Color.ClearColor.f, sizeof(float) * 4);

    bool use_simd16_replicated_data = true;

    /* From the SNB PRM (Vol4_Part1):
     *
     *     "Replicated data (Message Type = 111) is only supported when
     *      accessing tiled memory.  Using this Message Type to access linear
     *      (untiled) memory is UNDEFINED."
     */
    if (irb->mt->tiling == I915_TILING_NONE)
        use_simd16_replicated_data = false;

    /* Constant color writes ignore everyting in blend and color calculator
     * state.  This is not documented.
     */
    if (set_write_disables(irb, ctx->Color.ColorMask[buf],
                           params.color_write_disable))
        use_simd16_replicated_data = false;

    if (irb->mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_NO_MCS &&
            !partial_clear && use_simd16_replicated_data &&
            brw_is_color_fast_clear_compatible(brw, irb->mt,
                    &ctx->Color.ClearColor)) {
        memset(&params.wm_inputs, 0xff, 4*sizeof(float));
        params.fast_clear_op = GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;

        brw_get_fast_clear_rect(brw, fb, irb->mt, &params.x0, &params.y0,
                                &params.x1, &params.y1);
    } else {
        brw_meta_get_buffer_rect(fb, &params.x0, &params.y0,
                                 &params.x1, &params.y1);
    }

    brw_blorp_params_get_clear_kernel(brw, &params, use_simd16_replicated_data);

    const bool is_fast_clear =
        params.fast_clear_op == GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
    if (is_fast_clear) {
        /* Record the clear color in the miptree so that it will be
         * programmed in SURFACE_STATE by later rendering and resolve
         * operations.
         */
        const bool color_updated = brw_meta_set_fast_clear_color(
                                       brw, irb->mt, &ctx->Color.ClearColor);

        /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
         * is redundant and can be skipped.
         */
        if (!color_updated &&
                irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR)
            return true;

        /* If the MCS buffer hasn't been allocated yet, we need to allocate
         * it now.
         */
        if (!irb->mt->mcs_mt) {
            if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) {
                /* MCS allocation failed--probably this will only happen in
                 * out-of-memory conditions.  But in any case, try to recover
                 * by falling back to a non-blorp clear technique.
                 */
                return false;
            }
        }
    }

    const char *clear_type;
    if (is_fast_clear)
        clear_type = "fast";
    else if (use_simd16_replicated_data)
        clear_type = "replicated";
    else
        clear_type = "slow";

    DBG("%s (%s) to mt %p level %d layer %d\n", __FUNCTION__, clear_type,
        irb->mt, irb->mt_level, irb->mt_layer);

    brw_blorp_exec(brw, &params);

    if (is_fast_clear) {
        /* Now that the fast clear has occurred, put the buffer in
         * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
         * redundant clears.
         */
        irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
    } else if (intel_miptree_is_lossless_compressed(brw, irb->mt)) {
        /* Compressed buffers can be cleared also using normal rep-clear. In
         * such case they bahave such as if they were drawn using normal 3D
         * render pipeline, and we simply mark the mcs as dirty.
         */
        assert(partial_clear);
        irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
    }

    return true;
}
Пример #5
0
static bool
do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
                      struct gl_renderbuffer *rb, unsigned buf,
                      bool partial_clear, bool encode_srgb)
{
   struct gl_context *ctx = &brw->ctx;
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
   mesa_format format = irb->mt->format;
   uint32_t x0, x1, y0, y1;

   if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
      format = _mesa_get_srgb_format_linear(format);

   x0 = fb->_Xmin;
   x1 = fb->_Xmax;
   if (rb->Name != 0) {
      y0 = fb->_Ymin;
      y1 = fb->_Ymax;
   } else {
      y0 = rb->Height - fb->_Ymax;
      y1 = rb->Height - fb->_Ymin;
   }

   /* If the clear region is empty, just return. */
   if (x0 == x1 || y0 == y1)
      return true;

   bool can_fast_clear = !partial_clear;

   bool color_write_disable[4] = { false, false, false, false };
   if (set_write_disables(irb, ctx->Color.ColorMask[buf], color_write_disable))
      can_fast_clear = false;

   if (irb->mt->no_ccs ||
       !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor))
      can_fast_clear = false;

   const unsigned logical_layer = irb_logical_mt_layer(irb);
   const enum intel_fast_clear_state fast_clear_state =
      intel_miptree_get_fast_clear_state(irb->mt, irb->mt_level,
                                         logical_layer);

   /* Surface state can only record one fast clear color value. Therefore
    * unless different levels/layers agree on the color it can be used to
    * represent only single level/layer. Here it will be reserved for the
    * first slice (level 0, layer 0).
    */
   if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer)
      can_fast_clear = false;

   if (can_fast_clear) {
      union gl_color_union override_color =
         brw_meta_convert_fast_clear_color(brw, irb->mt,
                                           &ctx->Color.ClearColor);

      /* Record the clear color in the miptree so that it will be
       * programmed in SURFACE_STATE by later rendering and resolve
       * operations.
       */
      const bool color_updated = brw_meta_set_fast_clear_color(
                                    brw, &irb->mt->gen9_fast_clear_color,
                                    &override_color);

      /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
       * is redundant and can be skipped.
       */
      if (!color_updated && fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR)
         return true;

      /* If the MCS buffer hasn't been allocated yet, we need to allocate
       * it now.
       */
      if (!irb->mt->mcs_buf) {
         assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
         if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt, false)) {
            /* MCS allocation failed--probably this will only happen in
             * out-of-memory conditions.  But in any case, try to recover
             * by falling back to a non-blorp clear technique.
             */
            return false;
         }
      }
   }

   const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1;

   /* We can't setup the blorp_surf until we've allocated the MCS above */
   struct isl_surf isl_tmp[2];
   struct blorp_surf surf;
   unsigned level = irb->mt_level;
   blorp_surf_for_miptree(brw, &surf, irb->mt, true,
                          (1 << ISL_AUX_USAGE_MCS) |
                          (1 << ISL_AUX_USAGE_CCS_E) |
                          (1 << ISL_AUX_USAGE_CCS_D),
                          &level, logical_layer, num_layers, isl_tmp);

   if (can_fast_clear) {
      DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
          irb->mt, irb->mt_level, irb->mt_layer, num_layers);

      struct blorp_batch batch;
      blorp_batch_init(&brw->blorp, &batch, brw, 0);
      blorp_fast_clear(&batch, &surf,
                       (enum isl_format)brw->render_target_format[format],
                       level, logical_layer, num_layers,
                       x0, y0, x1, y1);
      blorp_batch_finish(&batch);

      /* Now that the fast clear has occurred, put the buffer in
       * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
       * redundant clears.
       */
      intel_miptree_set_fast_clear_state(brw, irb->mt, irb->mt_level,
                                         logical_layer, num_layers,
                                         INTEL_FAST_CLEAR_STATE_CLEAR);
   } else {
      DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__,
          irb->mt, irb->mt_level, irb->mt_layer, num_layers);

      union isl_color_value clear_color;
      memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4);

      struct blorp_batch batch;
      blorp_batch_init(&brw->blorp, &batch, brw, 0);
      blorp_clear(&batch, &surf,
                  (enum isl_format)brw->render_target_format[format],
                  ISL_SWIZZLE_IDENTITY,
                  level, irb_logical_mt_layer(irb), num_layers,
                  x0, y0, x1, y1,
                  clear_color, color_write_disable);
      blorp_batch_finish(&batch);
   }

   return true;
}
Пример #6
0
static void
get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
                    struct intel_renderbuffer *irb, struct rect *rect)
{
   unsigned int x_align, y_align;
   unsigned int x_scaledown, y_scaledown;

   /* Only single sampled surfaces need to (and actually can) be resolved. */
   if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
       intel_miptree_is_lossless_compressed(brw, irb->mt)) {
      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
       *
       *     Clear pass must have a clear rectangle that must follow
       *     alignment rules in terms of pixels and lines as shown in the
       *     table below. Further, the clear-rectangle height and width
       *     must be multiple of the following dimensions. If the height
       *     and width of the render target being cleared do not meet these
       *     requirements, an MCS buffer can be created such that it
       *     follows the requirement and covers the RT.
       *
       * The alignment size in the table that follows is related to the
       * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
       * with X alignment multiplied by 16 and Y alignment multiplied by 32.
       */
      intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
      x_align *= 16;

      /* SKL+ line alignment requirement for Y-tiled are half those of the prior
       * generations.
       */
      if (brw->gen >= 9)
         y_align *= 16;
      else
         y_align *= 32;

      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
       *
       *     In order to optimize the performance MCS buffer (when bound to
       *     1X RT) clear similarly to MCS buffer clear for MSRT case,
       *     clear rect is required to be scaled by the following factors
       *     in the horizontal and vertical directions:
       *
       * The X and Y scale down factors in the table that follows are each
       * equal to half the alignment value computed above.
       */
      x_scaledown = x_align / 2;
      y_scaledown = y_align / 2;

      /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
       * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
       * Clear of Non-MultiSampled Render Target Restrictions":
       *
       *   Clear rectangle must be aligned to two times the number of
       *   pixels in the table shown below due to 16x16 hashing across the
       *   slice.
       */
      x_align *= 2;
      y_align *= 2;
   } else {
      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
       * Target(s)", beneath the "MSAA Compression" bullet (p326):
       *
       *     Clear pass for this case requires that scaled down primitive
       *     is sent down with upper left co-ordinate to coincide with
       *     actual rectangle being cleared. For MSAA, clear rectangle’s
       *     height and width need to as show in the following table in
       *     terms of (width,height) of the RT.
       *
       *     MSAA  Width of Clear Rect  Height of Clear Rect
       *      2X     Ceil(1/8*width)      Ceil(1/2*height)
       *      4X     Ceil(1/8*width)      Ceil(1/2*height)
       *      8X     Ceil(1/2*width)      Ceil(1/2*height)
       *     16X         width            Ceil(1/2*height)
       *
       * The text "with upper left co-ordinate to coincide with actual
       * rectangle being cleared" is a little confusing--it seems to imply
       * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
       * feed the pipeline using the rectangle (x,y) to
       * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on
       * the number of samples.  Experiments indicate that this is not
       * quite correct; actually, what the hardware appears to do is to
       * align whatever rectangle is sent down the pipeline to the nearest
       * multiple of 2x2 blocks, and then scale it up by a factor of N
       * horizontally and 2 vertically.  So the resulting alignment is 4
       * vertically and either 4 or 16 horizontally, and the scaledown
       * factor is 2 vertically and either 2 or 8 horizontally.
       */
      switch (irb->mt->num_samples) {
      case 2:
      case 4:
         x_scaledown = 8;
         break;
      case 8:
         x_scaledown = 2;
         break;
      case 16:
         x_scaledown = 1;
         break;
      default:
         unreachable("Unexpected sample count for fast clear");
      }
      y_scaledown = 2;
      x_align = x_scaledown * 2;
      y_align = y_scaledown * 2;
   }

   rect->x0 = fb->_Xmin;
   rect->x1 = fb->_Xmax;
   if (fb->Name != 0) {
      rect->y0 = fb->_Ymin;
      rect->y1 = fb->_Ymax;
   } else {
      rect->y0 = fb->Height - fb->_Ymax;
      rect->y1 = fb->Height - fb->_Ymin;
   }

   rect->x0 = ROUND_DOWN_TO(rect->x0,  x_align) / x_scaledown;
   rect->y0 = ROUND_DOWN_TO(rect->y0, y_align) / y_scaledown;
   rect->x1 = ALIGN(rect->x1, x_align) / x_scaledown;
   rect->y1 = ALIGN(rect->y1, y_align) / y_scaledown;
}