/** * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set. */ void gen7_emit_cs_stall_flush(struct brw_context *brw) { brw_emit_pipe_control_write(brw, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE, brw->workaround_bo, 0, 0, 0); }
/** * Emits a PIPE_CONTROL with a non-zero post-sync operation, for * implementing two workarounds on gen6. From section 1.4.7.1 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: * * [DevSNB-C+{W/A}] Before any depth stall flush (including those * produced by non-pipelined state commands), software needs to first * send a PIPE_CONTROL with no bits set except Post-Sync Operation != * 0. * * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * * And the workaround for these two requires this workaround first: * * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. * * And this last workaround is tricky because of the requirements on * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM * volume 2 part 1: * * "1 of the following must also be set: * - Render Target Cache Flush Enable ([12] of DW1) * - Depth Cache Flush Enable ([0] of DW1) * - Stall at Pixel Scoreboard ([1] of DW1) * - Depth Stall ([13] of DW1) * - Post-Sync Operation ([13] of DW1) * - Notify Enable ([8] of DW1)" * * The cache flushes require the workaround flush that triggered this * one, so we can't use it. Depth stall would trigger the same. * Post-sync nonzero is what triggered this second workaround, so we * can't use that one either. Notify enable is IRQs, which aren't * really our business. That leaves only stall at scoreboard. */ void brw_emit_post_sync_nonzero_flush(struct brw_context *brw) { brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, brw->workaround_bo, 0, 0, 0); }
/** * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input): * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs * to be sent before any combination of VS associated 3DSTATE." */ void gen7_emit_vs_workaround_flush(struct brw_context *brw) { assert(brw->gen == 7); brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_DEPTH_STALL, brw->workaround_bo, 0, 0, 0); }
/** * Emit packets to perform a depth/HiZ resolve or fast depth/stencil clear. * * See the "Optimized Depth Buffer Clear and/or Stencil Buffer Clear" section * of the hardware documentation for details. */ void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer, enum gen6_hiz_op op) { if (op == GEN6_HIZ_OP_NONE) return; assert(mt->first_level == 0); assert(mt->logical_depth0 >= 1); /* The basic algorithm is: * - If needed, emit 3DSTATE_{DEPTH,HIER_DEPTH,STENCIL}_BUFFER and * 3DSTATE_CLEAR_PARAMS packets to set up the relevant buffers. * - If needed, emit 3DSTATE_DRAWING_RECTANGLE. * - Emit 3DSTATE_WM_HZ_OP with a bit set for the particular operation. * - Do a special PIPE_CONTROL to trigger an implicit rectangle primitive. * - Emit 3DSTATE_WM_HZ_OP with no bits set to return to normal rendering. */ emit_depth_packets(brw, mt, brw_depth_format(brw, mt->format), BRW_SURFACE_2D, true, /* depth writes */ NULL, false, 0, /* no stencil for now */ true, /* hiz */ mt->logical_width0, mt->logical_height0, mt->logical_depth0, level, layer); /* min_array_element */ unsigned rect_width = minify(mt->logical_width0, level); unsigned rect_height = minify(mt->logical_height0, level); BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); OUT_BATCH(((rect_width - 1) & 0xffff) | ((rect_height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); /* Emit 3DSTATE_WM_HZ_OP to override pipeline state for the particular * resolve or clear operation we want to perform. */ uint32_t dw1 = 0; switch (op) { case GEN6_HIZ_OP_DEPTH_RESOLVE: dw1 |= GEN8_WM_HZ_DEPTH_RESOLVE; break; case GEN6_HIZ_OP_HIZ_RESOLVE: dw1 |= GEN8_WM_HZ_HIZ_RESOLVE; break; case GEN6_HIZ_OP_DEPTH_CLEAR: dw1 |= GEN8_WM_HZ_DEPTH_CLEAR; break; case GEN6_HIZ_OP_NONE: assert(!"Should not get here."); } if (mt->num_samples > 0) dw1 |= SET_FIELD(ffs(mt->num_samples) - 1, GEN8_WM_HZ_NUM_SAMPLES); BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); OUT_BATCH(dw1); OUT_BATCH(0); OUT_BATCH(SET_FIELD(rect_width, GEN8_WM_HZ_CLEAR_RECTANGLE_X_MAX) | SET_FIELD(rect_height, GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MAX)); OUT_BATCH(SET_FIELD(0xFFFF, GEN8_WM_HZ_SAMPLE_MASK)); ADVANCE_BATCH(); /* Emit a PIPE_CONTROL with "Post-Sync Operation" set to "Write Immediate * Data", and no other bits set. This causes 3DSTATE_WM_HZ_OP's state to * take effect, and spawns a rectangle primitive. */ brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, brw->batch.workaround_bo, 0, 0, 0); /* Emit 3DSTATE_WM_HZ_OP again to disable the state overrides. */ BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); /* Mark this buffer as needing a TC flush, as we've rendered to it. */ brw_render_cache_set_add_bo(brw, mt->region->bo); /* We've clobbered all of the depth packets, and the drawing rectangle, * so we need to ensure those packets are re-emitted before the next * primitive. * * Setting _NEW_DEPTH and _NEW_BUFFERS covers it, but is rather overkill. */ brw->state.dirty.mesa |= _NEW_DEPTH | _NEW_BUFFERS; }