void brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt) { DBG("%s to mt %p\n", __FUNCTION__, mt); const mesa_format format = _mesa_get_srgb_format_linear(mt->format); struct brw_blorp_params params; brw_blorp_params_init(¶ms); brw_blorp_surface_info_init(brw, ¶ms.dst, mt, 0 /* level */, 0 /* layer */, format, true); brw_get_resolve_rect(brw, mt, ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); if (intel_miptree_is_lossless_compressed(brw, mt)) params.resolve_type = GEN9_PS_RENDER_TARGET_RESOLVE_FULL; else params.resolve_type = GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE; /* Note: there is no need to initialize push constants because it doesn't * matter what data gets dispatched to the render target. However, we must * ensure that the fragment shader delivers the data using the "replicated * color" message. */ brw_blorp_params_get_clear_kernel(brw, ¶ms, true); brw_blorp_exec(brw, ¶ms); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; }
void brw_meta_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt) { struct gl_context *ctx = &brw->ctx; GLuint fbo; struct gl_renderbuffer *rb; struct rect rect; brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); _mesa_GenFramebuffers(1, &fbo); rb = brw_get_rb_for_slice(brw, mt, 0, 0, false); _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); brw_fast_clear_init(brw); use_rectlist(brw, true); brw_bind_rep_write_shader(brw, (float *) fast_clear_color); /* SKL+ also has a resolve mode for compressed render targets and thus more * bits to let us select the type of resolve. For fast clear resolves, it * turns out we can use the same value as pre-SKL though. */ if (intel_miptree_is_lossless_compressed(brw, mt)) set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL); else set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; get_resolve_rect(brw, mt, &rect); brw_draw_rectlist(brw, &rect, 1); set_fast_clear_op(brw, 0); use_rectlist(brw, false); _mesa_reference_renderbuffer(&rb, NULL); _mesa_DeleteFramebuffers(1, &fbo); _mesa_meta_end(ctx); /* We're typically called from intel_update_state() and we're supposed to * return with the state all updated to what it was before * brw_meta_resolve_color() was called. The meta rendering will have * messed up the state and we need to call _mesa_update_state() again to * get back to where we were supposed to be when resolve was called. */ if (ctx->NewState) _mesa_update_state(ctx); }
/** * \brief Call this after drawing to mark which buffers need resolving * * If the depth buffer was written to and if it has an accompanying HiZ * buffer, then mark that it needs a depth resolve. * * If the color buffer is a multisample window system buffer, then * mark that it needs a downsample. * * Also mark any render targets which will be textured as needing a render * cache flush. */ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; struct intel_renderbuffer *front_irb = NULL; struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; if (_mesa_is_front_buffer_drawing(fb)) front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); if (front_irb) front_irb->need_downsample = true; if (back_irb) back_irb->need_downsample = true; if (depth_irb && ctx->Depth.Mask) { intel_renderbuffer_att_set_needs_depth_resolve(depth_att); brw_render_cache_set_add_bo(brw, depth_irb->mt->bo); } if (ctx->Extensions.ARB_stencil_texturing && stencil_irb && ctx->Stencil._WriteEnabled) { brw_render_cache_set_add_bo(brw, stencil_irb->mt->bo); } for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { struct intel_renderbuffer *irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); if (irb) { brw_render_cache_set_add_bo(brw, irb->mt->bo); if (intel_miptree_is_lossless_compressed(brw, irb->mt)) { irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED; } } } }
static bool do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb, struct gl_renderbuffer *rb, unsigned buf, bool partial_clear, bool encode_srgb, unsigned layer) { struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); mesa_format format = irb->mt->format; struct brw_blorp_params params; brw_blorp_params_init(¶ms); if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB) format = _mesa_get_srgb_format_linear(format); brw_blorp_surface_info_init(brw, ¶ms.dst, irb->mt, irb->mt_level, layer, format, true); /* Override the surface format according to the context's sRGB rules. */ params.dst.brw_surfaceformat = brw->render_target_format[format]; params.x0 = fb->_Xmin; params.x1 = fb->_Xmax; if (rb->Name != 0) { params.y0 = fb->_Ymin; params.y1 = fb->_Ymax; } else { params.y0 = rb->Height - fb->_Ymax; params.y1 = rb->Height - fb->_Ymin; } memcpy(¶ms.wm_inputs, ctx->Color.ClearColor.f, sizeof(float) * 4); bool use_simd16_replicated_data = true; /* From the SNB PRM (Vol4_Part1): * * "Replicated data (Message Type = 111) is only supported when * accessing tiled memory. Using this Message Type to access linear * (untiled) memory is UNDEFINED." */ if (irb->mt->tiling == I915_TILING_NONE) use_simd16_replicated_data = false; /* Constant color writes ignore everyting in blend and color calculator * state. This is not documented. */ if (set_write_disables(irb, ctx->Color.ColorMask[buf], params.color_write_disable)) use_simd16_replicated_data = false; if (irb->mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_NO_MCS && !partial_clear && use_simd16_replicated_data && brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor)) { memset(¶ms.wm_inputs, 0xff, 4*sizeof(float)); params.fast_clear_op = GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE; brw_get_fast_clear_rect(brw, fb, irb->mt, ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); } else { brw_meta_get_buffer_rect(fb, ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); } brw_blorp_params_get_clear_kernel(brw, ¶ms, use_simd16_replicated_data); const bool is_fast_clear = params.fast_clear_op == GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE; if (is_fast_clear) { /* Record the clear color in the miptree so that it will be * programmed in SURFACE_STATE by later rendering and resolve * operations. */ const bool color_updated = brw_meta_set_fast_clear_color( brw, irb->mt, &ctx->Color.ClearColor); /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear * is redundant and can be skipped. */ if (!color_updated && irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) return true; /* If the MCS buffer hasn't been allocated yet, we need to allocate * it now. */ if (!irb->mt->mcs_mt) { if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) { /* MCS allocation failed--probably this will only happen in * out-of-memory conditions. But in any case, try to recover * by falling back to a non-blorp clear technique. */ return false; } } } const char *clear_type; if (is_fast_clear) clear_type = "fast"; else if (use_simd16_replicated_data) clear_type = "replicated"; else clear_type = "slow"; DBG("%s (%s) to mt %p level %d layer %d\n", __FUNCTION__, clear_type, irb->mt, irb->mt_level, irb->mt_layer); brw_blorp_exec(brw, ¶ms); if (is_fast_clear) { /* Now that the fast clear has occurred, put the buffer in * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing * redundant clears. */ irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; } else if (intel_miptree_is_lossless_compressed(brw, irb->mt)) { /* Compressed buffers can be cleared also using normal rep-clear. In * such case they bahave such as if they were drawn using normal 3D * render pipeline, and we simply mark the mcs as dirty. */ assert(partial_clear); irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED; } return true; }
static bool do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb, struct gl_renderbuffer *rb, unsigned buf, bool partial_clear, bool encode_srgb) { struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); mesa_format format = irb->mt->format; uint32_t x0, x1, y0, y1; if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB) format = _mesa_get_srgb_format_linear(format); x0 = fb->_Xmin; x1 = fb->_Xmax; if (rb->Name != 0) { y0 = fb->_Ymin; y1 = fb->_Ymax; } else { y0 = rb->Height - fb->_Ymax; y1 = rb->Height - fb->_Ymin; } /* If the clear region is empty, just return. */ if (x0 == x1 || y0 == y1) return true; bool can_fast_clear = !partial_clear; bool color_write_disable[4] = { false, false, false, false }; if (set_write_disables(irb, ctx->Color.ColorMask[buf], color_write_disable)) can_fast_clear = false; if (irb->mt->no_ccs || !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor)) can_fast_clear = false; const unsigned logical_layer = irb_logical_mt_layer(irb); const enum intel_fast_clear_state fast_clear_state = intel_miptree_get_fast_clear_state(irb->mt, irb->mt_level, logical_layer); /* Surface state can only record one fast clear color value. Therefore * unless different levels/layers agree on the color it can be used to * represent only single level/layer. Here it will be reserved for the * first slice (level 0, layer 0). */ if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer) can_fast_clear = false; if (can_fast_clear) { union gl_color_union override_color = brw_meta_convert_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor); /* Record the clear color in the miptree so that it will be * programmed in SURFACE_STATE by later rendering and resolve * operations. */ const bool color_updated = brw_meta_set_fast_clear_color( brw, &irb->mt->gen9_fast_clear_color, &override_color); /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear * is redundant and can be skipped. */ if (!color_updated && fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) return true; /* If the MCS buffer hasn't been allocated yet, we need to allocate * it now. */ if (!irb->mt->mcs_buf) { assert(!intel_miptree_is_lossless_compressed(brw, irb->mt)); if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt, false)) { /* MCS allocation failed--probably this will only happen in * out-of-memory conditions. But in any case, try to recover * by falling back to a non-blorp clear technique. */ return false; } } } const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1; /* We can't setup the blorp_surf until we've allocated the MCS above */ struct isl_surf isl_tmp[2]; struct blorp_surf surf; unsigned level = irb->mt_level; blorp_surf_for_miptree(brw, &surf, irb->mt, true, (1 << ISL_AUX_USAGE_MCS) | (1 << ISL_AUX_USAGE_CCS_E) | (1 << ISL_AUX_USAGE_CCS_D), &level, logical_layer, num_layers, isl_tmp); if (can_fast_clear) { DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__, irb->mt, irb->mt_level, irb->mt_layer, num_layers); struct blorp_batch batch; blorp_batch_init(&brw->blorp, &batch, brw, 0); blorp_fast_clear(&batch, &surf, (enum isl_format)brw->render_target_format[format], level, logical_layer, num_layers, x0, y0, x1, y1); blorp_batch_finish(&batch); /* Now that the fast clear has occurred, put the buffer in * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing * redundant clears. */ intel_miptree_set_fast_clear_state(brw, irb->mt, irb->mt_level, logical_layer, num_layers, INTEL_FAST_CLEAR_STATE_CLEAR); } else { DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__, irb->mt, irb->mt_level, irb->mt_layer, num_layers); union isl_color_value clear_color; memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4); struct blorp_batch batch; blorp_batch_init(&brw->blorp, &batch, brw, 0); blorp_clear(&batch, &surf, (enum isl_format)brw->render_target_format[format], ISL_SWIZZLE_IDENTITY, level, irb_logical_mt_layer(irb), num_layers, x0, y0, x1, y1, clear_color, color_write_disable); blorp_batch_finish(&batch); } return true; }
static void get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, struct intel_renderbuffer *irb, struct rect *rect) { unsigned int x_align, y_align; unsigned int x_scaledown, y_scaledown; /* Only single sampled surfaces need to (and actually can) be resolved. */ if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || intel_miptree_is_lossless_compressed(brw, irb->mt)) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): * * Clear pass must have a clear rectangle that must follow * alignment rules in terms of pixels and lines as shown in the * table below. Further, the clear-rectangle height and width * must be multiple of the following dimensions. If the height * and width of the render target being cleared do not meet these * requirements, an MCS buffer can be created such that it * follows the requirement and covers the RT. * * The alignment size in the table that follows is related to the * alignment size returned by intel_get_non_msrt_mcs_alignment(), but * with X alignment multiplied by 16 and Y alignment multiplied by 32. */ intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align); x_align *= 16; /* SKL+ line alignment requirement for Y-tiled are half those of the prior * generations. */ if (brw->gen >= 9) y_align *= 16; else y_align *= 32; /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): * * In order to optimize the performance MCS buffer (when bound to * 1X RT) clear similarly to MCS buffer clear for MSRT case, * clear rect is required to be scaled by the following factors * in the horizontal and vertical directions: * * The X and Y scale down factors in the table that follows are each * equal to half the alignment value computed above. */ x_scaledown = x_align / 2; y_scaledown = y_align / 2; /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color * Clear of Non-MultiSampled Render Target Restrictions": * * Clear rectangle must be aligned to two times the number of * pixels in the table shown below due to 16x16 hashing across the * slice. */ x_align *= 2; y_align *= 2; } else { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "MSAA Compression" bullet (p326): * * Clear pass for this case requires that scaled down primitive * is sent down with upper left co-ordinate to coincide with * actual rectangle being cleared. For MSAA, clear rectangle’s * height and width need to as show in the following table in * terms of (width,height) of the RT. * * MSAA Width of Clear Rect Height of Clear Rect * 2X Ceil(1/8*width) Ceil(1/2*height) * 4X Ceil(1/8*width) Ceil(1/2*height) * 8X Ceil(1/2*width) Ceil(1/2*height) * 16X width Ceil(1/2*height) * * The text "with upper left co-ordinate to coincide with actual * rectangle being cleared" is a little confusing--it seems to imply * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to * feed the pipeline using the rectangle (x,y) to * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on * the number of samples. Experiments indicate that this is not * quite correct; actually, what the hardware appears to do is to * align whatever rectangle is sent down the pipeline to the nearest * multiple of 2x2 blocks, and then scale it up by a factor of N * horizontally and 2 vertically. So the resulting alignment is 4 * vertically and either 4 or 16 horizontally, and the scaledown * factor is 2 vertically and either 2 or 8 horizontally. */ switch (irb->mt->num_samples) { case 2: case 4: x_scaledown = 8; break; case 8: x_scaledown = 2; break; case 16: x_scaledown = 1; break; default: unreachable("Unexpected sample count for fast clear"); } y_scaledown = 2; x_align = x_scaledown * 2; y_align = y_scaledown * 2; } rect->x0 = fb->_Xmin; rect->x1 = fb->_Xmax; if (fb->Name != 0) { rect->y0 = fb->_Ymin; rect->y1 = fb->_Ymax; } else { rect->y0 = fb->Height - fb->_Ymax; rect->y1 = fb->Height - fb->_Ymin; } rect->x0 = ROUND_DOWN_TO(rect->x0, x_align) / x_scaledown; rect->y0 = ROUND_DOWN_TO(rect->y0, y_align) / y_scaledown; rect->x1 = ALIGN(rect->x1, x_align) / x_scaledown; rect->y1 = ALIGN(rect->y1, y_align) / y_scaledown; }