static void brw_upload_initial_gpu_state(struct brw_context *brw) { /* On platforms with hardware contexts, we can set our initial GPU state * right away rather than doing it via state atoms. This saves a small * amount of overhead on every draw call. */ if (!brw->hw_ctx) return; if (brw->gen == 6) brw_emit_post_sync_nonzero_flush(brw); brw_upload_invariant_state(brw); /* Recommended optimization for Victim Cache eviction in pixel backend. */ if (brw->gen >= 9) { BEGIN_BATCH(3); OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); OUT_BATCH(GEN7_CACHE_MODE_1); OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) | GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC); ADVANCE_BATCH(); } if (brw->gen >= 8) { gen8_emit_3dstate_sample_pattern(brw); } }
/* Emit a pipelined flush to either flush render and texture cache for * reading from a FBO-drawn texture, or flush so that frontbuffer * render appears on the screen in DRI1. * * This is also used for the always_flush_cache driconf debug option. */ void brw_emit_mi_flush(struct brw_context *brw) { if (brw->batch.ring == BLT_RING && brw->gen >= 6) { BEGIN_BATCH_BLT(4); OUT_BATCH(MI_FLUSH_DW); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; if (brw->gen >= 6) { if (brw->gen == 9) { /* Hardware workaround: SKL * * Emit Pipe Control with all bits set to zero before emitting * a Pipe Control with VF Cache Invalidate set. */ brw_emit_pipe_control_flush(brw, 0); } flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL; if (brw->gen == 6) { /* Hardware workaround: SNB B-Spec says: * * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache * Flush Enable =1, a PIPE_CONTROL with any non-zero * post-sync-op is required. */ brw_emit_post_sync_nonzero_flush(brw); } } brw_emit_pipe_control_flush(brw, flags); } brw_render_cache_set_clear(brw); }
static inline void brw_upload_pipeline_state(struct brw_context *brw, enum brw_pipeline pipeline) { struct gl_context *ctx = &brw->ctx; int i; static int dirty_count = 0; struct brw_state_flags state = brw->state.pipelines[pipeline]; unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer); brw_select_pipeline(brw, pipeline); if (0) { /* Always re-emit all state. */ brw->NewGLState = ~0; ctx->NewDriverState = ~0ull; } if (pipeline == BRW_RENDER_PIPELINE) { if (brw->fragment_program != ctx->FragmentProgram._Current) { brw->fragment_program = ctx->FragmentProgram._Current; brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; } if (brw->tess_eval_program != ctx->TessEvalProgram._Current) { brw->tess_eval_program = ctx->TessEvalProgram._Current; brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS; } if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) { brw->tess_ctrl_program = ctx->TessCtrlProgram._Current; brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS; } if (brw->geometry_program != ctx->GeometryProgram._Current) { brw->geometry_program = ctx->GeometryProgram._Current; brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM; } if (brw->vertex_program != ctx->VertexProgram._Current) { brw->vertex_program = ctx->VertexProgram._Current; brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM; } } if (brw->compute_program != ctx->ComputeProgram._Current) { brw->compute_program = ctx->ComputeProgram._Current; brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM; } if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) { brw->meta_in_progress = _mesa_meta_in_progress(ctx); brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS; } if (brw->num_samples != fb_samples) { brw->num_samples = fb_samples; brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES; } /* Exit early if no state is flagged as dirty */ merge_ctx_state(brw, &state); if ((state.mesa | state.brw) == 0) return; /* Emit Sandybridge workaround flushes on every primitive, for safety. */ if (brw->gen == 6) brw_emit_post_sync_nonzero_flush(brw); brw_upload_programs(brw, pipeline); merge_ctx_state(brw, &state); const struct brw_tracked_state *atoms = brw_get_pipeline_atoms(brw, pipeline); const int num_atoms = brw->num_atoms[pipeline]; if (unlikely(INTEL_DEBUG)) { /* Debug version which enforces various sanity checks on the * state flags which are generated and checked to help ensure * state atoms are ordered correctly in the list. */ struct brw_state_flags examined, prev; memset(&examined, 0, sizeof(examined)); prev = state; for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = &atoms[i]; struct brw_state_flags generated; check_and_emit_atom(brw, &state, atom); accumulate_state(&examined, &atom->dirty); /* generated = (prev ^ state) * if (examined & generated) * fail; */ xor_states(&generated, &prev, &state); assert(!check_state(&examined, &generated)); prev = state; } } else { for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = &atoms[i]; check_and_emit_atom(brw, &state, atom); } } if (unlikely(INTEL_DEBUG & DEBUG_STATE)) { STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1); brw_update_dirty_count(mesa_bits, state.mesa); brw_update_dirty_count(brw_bits, state.brw); if (dirty_count++ % 1000 == 0) { brw_print_dirty_count(mesa_bits); brw_print_dirty_count(brw_bits); fprintf(stderr, "\n"); } } }
/** * \brief Execute a blit or render pass operation. * * To execute the operation, this function manually constructs and emits a * batch to draw a rectangle primitive. The batchbuffer is flushed before * constructing and after emitting the batch. * * This function alters no GL state. */ void gen6_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params) { uint32_t cc_blend_state_offset = 0; uint32_t cc_state_offset = 0; uint32_t depthstencil_offset; uint32_t wm_push_const_offset = 0; uint32_t wm_bind_bo_offset = 0; /* Emit workaround flushes when we switch from drawing to blorping. */ brw_emit_post_sync_nonzero_flush(brw); brw_upload_state_base_address(brw); gen6_emit_3dstate_multisample(brw, params->dst.num_samples); gen6_emit_3dstate_sample_mask(brw, params->dst.num_samples > 1 ? (1 << params->dst.num_samples) - 1 : 1); gen6_blorp_emit_vertices(brw, params); gen6_blorp_emit_urb_config(brw, params); if (params->wm_prog_data) { cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); cc_state_offset = gen6_blorp_emit_cc_state(brw); } depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset, depthstencil_offset, cc_state_offset); if (params->wm_prog_data) { uint32_t wm_surf_offset_renderbuffer; uint32_t wm_surf_offset_texture = 0; wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); intel_miptree_used_for_rendering(params->dst.mt); wm_surf_offset_renderbuffer = gen6_blorp_emit_surface_state(brw, params, ¶ms->dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); if (params->src.mt) { wm_surf_offset_texture = gen6_blorp_emit_surface_state(brw, params, ¶ms->src, I915_GEM_DOMAIN_SAMPLER, 0); } wm_bind_bo_offset = gen6_blorp_emit_binding_table(brw, wm_surf_offset_renderbuffer, wm_surf_offset_texture); } if (params->src.mt) { const uint32_t sampler_offset = gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset); } gen6_blorp_emit_vs_disable(brw, params); gen6_blorp_emit_gs_disable(brw, params); gen6_blorp_emit_clip_disable(brw); gen6_blorp_emit_sf_config(brw, params); if (params->wm_prog_data) gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset); else gen6_blorp_emit_constant_ps_disable(brw, params); gen6_blorp_emit_wm_config(brw, params); if (params->wm_prog_data) gen6_blorp_emit_binding_table_pointers(brw, wm_bind_bo_offset); gen6_blorp_emit_viewport_state(brw, params); if (params->depth.mt) gen6_blorp_emit_depth_stencil_config(brw, params); else gen6_blorp_emit_depth_disable(brw, params); gen6_blorp_emit_clear_params(brw, params); gen6_blorp_emit_drawing_rectangle(brw, params); gen6_blorp_emit_primitive(brw, params); }
/** * Emit a series of PIPE_CONTROL commands, taking into account any * workarounds necessary to actually accomplish the caller's request. * * Unless otherwise noted, spec quotations in this function come from: * * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming * Restrictions for PIPE_CONTROL. * * You should not use this function directly. Use the helpers in * brw_pipe_control.c instead, which may split the pipe control further. */ void genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, struct brw_bo *bo, uint32_t offset, uint64_t imm) { UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); enum pipe_control_flags non_lri_post_sync_flags = post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; /* Recursive PIPE_CONTROL workarounds -------------------------------- * (http://knowyourmeme.com/memes/xzibit-yo-dawg) * * We do these first because we want to look at the original operation, * rather than any workarounds we set. */ if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { /* Hardware workaround: SNB B-Spec says: * * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is * required." */ brw_emit_post_sync_nonzero_flush(brw); } if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description * lists several workarounds: * * "Project: SKL, KBL, BXT * * If the VF Cache Invalidation Enable is set to a 1 in a * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields * sets to 0, with the VF Cache Invalidation Enable set to 0 * needs to be sent prior to the PIPE_CONTROL with VF Cache * Invalidation Enable set to a 1." */ genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0); } if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) { /* Project: SKL / Argument: LRI Post Sync Operation [23] * * "PIPECONTROL command with “Command Streamer Stall Enable” must be * programmed prior to programming a PIPECONTROL command with "LRI * Post Sync Operation" in GPGPU mode of operation (i.e when * PIPELINE_SELECT command is set to GPGPU mode of operation)." * * The same text exists a few rows below for Post Sync Op. */ genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0); } /* "Flush Types" workarounds --------------------------------------------- * We do these now because they may add post-sync operations or CS stalls. */ if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate * * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or * 'Write PS Depth Count' or 'Write Timestamp'." */ if (!bo) { flags |= PIPE_CONTROL_WRITE_IMMEDIATE; post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; bo = brw->workaround_bo; } } if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) { /* Project: PRE-HSW / Argument: Depth Stall * * "The following bits must be clear: * - Render Target Cache Flush Enable ([12] of DW1) * - Depth Cache Flush Enable ([0] of DW1)" */ assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH))); } if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) { /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): * * "This bit must be DISABLED for operations other than writing * PS_DEPTH_COUNT." * * This seems like nonsense. An Ivybridge workaround requires us to * emit a PIPE_CONTROL with a depth stall and write immediate post-sync * operation. Gen8+ requires us to emit depth stalls and depth cache * flushes together. So, it's hard to imagine this means anything other * than "we originally intended this to be used for PS_DEPTH_COUNT". * * We ignore the supposed restriction and do nothing. */ } if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) { /* Project: PRE-HSW / Argument: Depth Cache Flush * * "Depth Stall must be clear ([13] of DW1)." */ assert(!(flags & PIPE_CONTROL_DEPTH_STALL)); } if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_STALL_AT_SCOREBOARD)) { /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: * * "This bit must be DISABLED for End-of-pipe (Read) fences, * PS_DEPTH_COUNT or TIMESTAMP queries." * * TODO: Implement end-of-pipe checking. */ assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_WRITE_TIMESTAMP))); } if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) { /* From the PIPE_CONTROL instruction table, bit 1: * * "This bit is ignored if Depth Stall Enable is set. * Further, the render cache is not flushed even if Write Cache * Flush Enable bit is set." * * We assert that the caller doesn't do this combination, to try and * prevent mistakes. It shouldn't hurt the GPU, though. * * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard" * and "Render Target Flush" combo is explicitly required for BTI * update workarounds. */ assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_RENDER_TARGET_FLUSH))); } /* PIPE_CONTROL page workarounds ------------------------------------- */ if (IS_GEN_BETWEEN(7, 8) && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { /* From the PIPE_CONTROL page itself: * * "IVB, HSW, BDW * Restriction: Pipe_control with CS-stall bit set must be issued * before a pipe-control command that has the State Cache * Invalidate bit set." */ flags |= PIPE_CONTROL_CS_STALL; } if (GEN_IS_HASWELL) { /* From the PIPE_CONTROL page itself: * * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation: * Prior to programming a PIPECONTROL command with any of the RO * cache invalidation bit set, program a PIPECONTROL flush command * with “CS stall” bit and “HDC Flush” bit set." * * TODO: Actually implement this. What's an HDC Flush? */ } if (flags & PIPE_CONTROL_FLUSH_LLC) { /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): * * "Project: ALL * SW must always program Post-Sync Operation to "Write Immediate * Data" when Flush LLC is set." * * For now, we just require the caller to do it. */ assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); } /* "Post-Sync Operation" workarounds -------------------------------- */ /* Project: All / Argument: Global Snapshot Count Reset [19] * * "This bit must not be exercised on any product. * Requires stall bit ([20] of DW1) set." * * We don't use this, so we just assert that it isn't used. The * PIPE_CONTROL instruction page indicates that they intended this * as a debug feature and don't think it is useful in production, * but it may actually be usable, should we ever want to. */ assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { /* Project: All / Arguments: * * - Generic Media State Clear [16] * - Indirect State Pointers Disable [16] * * "Requires stall bit ([20] of DW1) set." * * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media * State Clear) says: * * "PIPECONTROL command with “Command Streamer Stall Enable” must be * programmed prior to programming a PIPECONTROL command with "Media * State Clear" set in GPGPU mode of operation" * * This is a subset of the earlier rule, so there's nothing to do. */ flags |= PIPE_CONTROL_CS_STALL; } if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { /* Project: All / Argument: Store Data Index * * "Post-Sync Operation ([15:14] of DW1) must be set to something other * than '0'." * * For now, we just assert that the caller does this. We might want to * automatically add a write to the workaround BO... */ assert(non_lri_post_sync_flags != 0); } if (flags & PIPE_CONTROL_SYNC_GFDT) { /* Project: All / Argument: Sync GFDT * * "Post-Sync Operation ([15:14] of DW1) must be set to something other * than '0' or 0x2520[13] must be set." * * For now, we just assert that the caller does this. */ assert(non_lri_post_sync_flags != 0); } if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { /* Project: SNB, IVB, HSW / Argument: TLB inv * * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1) * must be set to something other than '0'." * * For now, we just assert that the caller does this. */ assert(non_lri_post_sync_flags != 0); } if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { /* Project: IVB+ / Argument: TLB inv * * "Requires stall bit ([20] of DW1) set." * * Also, from the PIPE_CONTROL instruction table: * * "Project: SKL+ * Post Sync Operation or CS stall must be set to ensure a TLB * invalidation occurs. Otherwise no cycle will occur to the TLB * cache to invalidate." * * This is not a subset of the earlier rule, so there's nothing to do. */ flags |= PIPE_CONTROL_CS_STALL; } if (GEN_GEN == 9 && devinfo->gt == 4) { /* TODO: The big Skylake GT4 post sync op workaround */ } /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ if (IS_COMPUTE_PIPELINE(brw)) { if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { /* Project: SKL+ / Argument: Tex Invalidate * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." */ flags |= PIPE_CONTROL_CS_STALL; } if (GEN_GEN == 8 && (post_sync_flags || (flags & (PIPE_CONTROL_NOTIFY_ENABLE | PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH)))) { /* Project: BDW / Arguments: * * - LRI Post Sync Operation [23] * - Post Sync Op [15:14] * - Notify En [8] * - Depth Stall [13] * - Render Target Cache Flush [12] * - Depth Cache Flush [0] * - DC Flush Enable [5] * * "Requires stall bit ([20] of DW) set for all GPGPU and Media * Workloads." * * (The docs have separate table rows for each bit, with essentially * the same workaround text. We've combined them here.) */ flags |= PIPE_CONTROL_CS_STALL; /* Also, from the PIPE_CONTROL instruction table, bit 20: * * "Project: BDW * This bit must be always set when PIPE_CONTROL command is * programmed by GPGPU and MEDIA workloads, except for the cases * when only Read Only Cache Invalidation bits are set (State * Cache Invalidation Enable, Instruction cache Invalidation * Enable, Texture Cache Invalidation Enable, Constant Cache * Invalidation Enable). This is to WA FFDOP CG issue, this WA * need not implemented when FF_DOP_CG is disable via "Fixed * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." * * It sounds like we could avoid CS stalls in some cases, but we * don't currently bother. This list isn't exactly the list above, * either... */ } } /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: * * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." * * Note that the kernel does CS stalls between batches, so we only need * to count them within a batch. We currently naively count every 4, and * don't skip the ones with only read-cache-invalidate bits set. This * may or may not be a problem... */ if (GEN_GEN == 7 && !GEN_IS_HASWELL) { if (flags & PIPE_CONTROL_CS_STALL) { /* If we're doing a CS stall, reset the counter and carry on. */ brw->pipe_controls_since_last_cs_stall = 0; } /* If this is the fourth pipe control without a CS stall, do one now. */ if (++brw->pipe_controls_since_last_cs_stall == 4) { brw->pipe_controls_since_last_cs_stall = 0; flags |= PIPE_CONTROL_CS_STALL; } } /* "Stall" workarounds ---------------------------------------------- * These have to come after the earlier ones because we may have added * some additional CS stalls above. */ if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) { /* Project: PRE-SKL, VLV, CHV * * "[All Stepping][All SKUs]: * * One of the following must also be set: * * - Render Target Cache Flush Enable ([12] of DW1) * - Depth Cache Flush Enable ([0] of DW1) * - Stall at Pixel Scoreboard ([1] of DW1) * - Depth Stall ([13] of DW1) * - Post-Sync Operation ([13] of DW1) * - DC Flush Enable ([5] of DW1)" * * If we don't already have one of those bits set, we choose to add * "Stall at Pixel Scoreboard". Some of the other bits require a * CS stall as a workaround (see above), which would send us into * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" * appears to be safe, so we choose that. */ const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_WRITE_TIMESTAMP | PIPE_CONTROL_STALL_AT_SCOREBOARD | PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_DATA_CACHE_FLUSH; if (!(flags & wa_bits)) flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; } /* Emit --------------------------------------------------------------- */ brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) { #if GEN_GEN >= 9 pc.FlushLLC = 0; #endif #if GEN_GEN >= 7 pc.LRIPostSyncOperation = NoLRIOperation; pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; #endif #if GEN_GEN >= 6 pc.StoreDataIndex = 0; pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; pc.GlobalSnapshotCountReset = flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; pc.RenderTargetCacheFlushEnable = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; pc.StateCacheInvalidationEnable = flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; pc.ConstantCacheInvalidationEnable = flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; #else pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; #endif pc.PostSyncOperation = flags_to_post_sync_op(flags); pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; pc.InstructionCacheInvalidateEnable = flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; #if GEN_GEN >= 5 || GEN_IS_G4X pc.IndirectStatePointersDisable = flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; #endif #if GEN_GEN >= 6 pc.TextureCacheInvalidationEnable = flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; #elif GEN_GEN == 5 || GEN_IS_G4X pc.TextureCacheFlushEnable = flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; #endif pc.Address = ggtt_bo(bo, offset); if (GEN_GEN < 7 && bo) pc.DestinationAddressType = DAT_GGTT; pc.ImmediateData = imm; }
static void brw_upload_initial_gpu_state(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; const struct brw_compiler *compiler = brw->screen->compiler; /* On platforms with hardware contexts, we can set our initial GPU state * right away rather than doing it via state atoms. This saves a small * amount of overhead on every draw call. */ if (!brw->hw_ctx) return; if (devinfo->gen == 6) brw_emit_post_sync_nonzero_flush(brw); brw_upload_invariant_state(brw); if (devinfo->gen == 11) { /* The default behavior of bit 5 "Headerless Message for Pre-emptable * Contexts" in SAMPLER MODE register is set to 0, which means * headerless sampler messages are not allowed for pre-emptable * contexts. Set the bit 5 to 1 to allow them. */ brw_load_register_imm32(brw, GEN11_SAMPLER_MODE, HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK | HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS); /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in * HALF_SLICE_CHICKEN7 register. */ brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7, TEXEL_OFFSET_FIX_MASK | TEXEL_OFFSET_FIX_ENABLE); /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set * in L3CNTLREG register. The default setting of the bit is not the * desirable behavior. */ brw_load_register_imm32(brw, GEN8_L3CNTLREG, GEN8_L3CNTLREG_EDBC_NO_HANG); /* WA_2204188704: Pixel Shader Panic dispatch must be disabled. */ brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN3, PS_THREAD_PANIC_DISPATCH_MASK | PS_THREAD_PANIC_DISPATCH); /* WaEnableStateCacheRedirectToCS:icl */ brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1, GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE | REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE)); } if (devinfo->gen == 10 || devinfo->gen == 11) { /* From gen10 workaround table in h/w specs: * * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1 * a value of 0xFFFF" * * This means that we end up setting the entire 3D_MODE state. Bits * in this register control things such as slice hashing and we want * the default values of zero at the moment. */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2)); OUT_BATCH(0xFFFF << 16); ADVANCE_BATCH(); } if (devinfo->gen == 9) { /* Recommended optimizations for Victim Cache eviction and floating * point blending. */ brw_load_register_imm32(brw, GEN7_CACHE_MODE_1, REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) | REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) | GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE | GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC); if (gen_device_info_is_9lp(devinfo)) { brw_load_register_imm32(brw, GEN7_GT_MODE, GEN9_SUBSLICE_HASHING_MASK_BITS | GEN9_SUBSLICE_HASHING_16x16); } } if (devinfo->gen >= 8) { gen8_emit_3dstate_sample_pattern(brw); BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address. * * This is only safe on kernels with context isolation support. */ if (!compiler->constant_buffer_0_is_relative) { if (devinfo->gen >= 9) { BEGIN_BATCH(3); OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); OUT_BATCH(CS_DEBUG_MODE2); OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) | CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE); ADVANCE_BATCH(); } else if (devinfo->gen == 8) { BEGIN_BATCH(3); OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); OUT_BATCH(INSTPM); OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) | INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE); ADVANCE_BATCH(); } } brw->object_preemption = false; if (devinfo->gen >= 10) brw_enable_obj_preemption(brw, true); }
void brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) { const bool is_965 = brw->gen == 4 && !brw->is_g4x; const uint32_t _3DSTATE_PIPELINE_SELECT = is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45; if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) { /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * * Project: HSW, BDW, CHV, SKL, BXT * * Hardware Binding Tables are only supported for 3D * workloads. Resource streamer must be enabled only for 3D * workloads. Resource streamer must be disabled for Media and GPGPU * workloads. */ BEGIN_BATCH(1); OUT_BATCH(MI_RS_CONTROL | 0); ADVANCE_BATCH(); gen7_disable_hw_binding_tables(brw); /* XXX - Disable gather constant pool too when we start using it. */ } if (brw->gen >= 8 && brw->gen < 10) { /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: * * Software must clear the COLOR_CALC_STATE Valid field in * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT * with Pipeline Select set to GPGPU. * * The internal hardware docs recommend the same workaround for Gen9 * hardware too. */ if (pipeline == BRW_COMPUTE_PIPELINE) { BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); brw->ctx.NewDriverState |= BRW_NEW_CC_STATE; } } else if (brw->gen >= 6) { /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * * Project: DEVSNB+ * * Software must ensure all the write caches are flushed through a * stalling PIPE_CONTROL command followed by another PIPE_CONTROL * command to invalidate read only caches prior to programming * MI_PIPELINE_SELECT command to change the Pipeline Select Mode. */ const unsigned dc_flush = brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; if (brw->gen == 6) { /* Hardware workaround: SNB B-Spec says: * * Before a PIPE_CONTROL with Write Cache Flush Enable = 1, a * PIPE_CONTROL with any non-zero post-sync-op is required. */ brw_emit_post_sync_nonzero_flush(brw); } brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | dc_flush | PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL); brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_INVALIDATE | PIPE_CONTROL_NO_WRITE); } else { /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * * Project: PRE-DEVSNB * * Software must ensure the current pipeline is flushed via an * MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT. */ BEGIN_BATCH(1); OUT_BATCH(MI_FLUSH); ADVANCE_BATCH(); } /* Select the pipeline */ BEGIN_BATCH(1); OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 | (brw->gen >= 9 ? (3 << 8) : 0) | (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0)); ADVANCE_BATCH(); if (brw->gen == 7 && !brw->is_haswell && pipeline == BRW_RENDER_PIPELINE) { /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * * Project: DEVIVB, DEVHSW:GT3:A0 * * Software must send a pipe_control with a CS stall and a post sync * operation and then a dummy DRAW after every MI_SET_CONTEXT and * after any PIPELINE_SELECT that is enabling 3D mode. */ gen7_emit_cs_stall_flush(brw); BEGIN_BATCH(7); OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); OUT_BATCH(_3DPRIM_POINTLIST); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) { /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * * Project: HSW, BDW, CHV, SKL, BXT * * Hardware Binding Tables are only supported for 3D * workloads. Resource streamer must be enabled only for 3D * workloads. Resource streamer must be disabled for Media and GPGPU * workloads. */ BEGIN_BATCH(1); OUT_BATCH(MI_RS_CONTROL | 1); ADVANCE_BATCH(); gen7_enable_hw_binding_tables(brw); /* XXX - Re-enable gather constant pool here. */ } }
/** * Emit a PIPE_CONTROL with various flushing flags. * * The caller is responsible for deciding what flags are appropriate for the * given generation. */ void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) { if (brw->gen >= 6 && (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { /* A pipe control command with flush and invalidate bits set * simultaneously is an inherently racy operation on Gen6+ if the * contents of the flushed caches were intended to become visible from * any of the invalidated caches. Split it in two PIPE_CONTROLs, the * first one should stall the pipeline to make sure that the flushed R/W * caches are coherent with memory once the specified R/O caches are * invalidated. On pre-Gen6 hardware the (implicit) R/O cache * invalidation seems to happen at the bottom of the pipeline together * with any write cache flush, so this shouldn't be a concern. */ brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) | PIPE_CONTROL_CS_STALL); flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); } if (brw->gen >= 8) { if (brw->gen == 8) gen8_add_cs_stall_workaround_bits(&flags); if (brw->gen == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { /* Hardware workaround: SKL * * Emit Pipe Control with all bits set to zero before emitting * a Pipe Control with VF Cache Invalidate set. */ brw_emit_pipe_control_flush(brw, 0); } BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); OUT_BATCH(flags); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else if (brw->gen >= 6) { if (brw->gen == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { /* Hardware workaround: SNB B-Spec says: * * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is * required. */ brw_emit_post_sync_nonzero_flush(brw); } flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); OUT_BATCH(flags); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } }