static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
   /* On platforms with hardware contexts, we can set our initial GPU state
    * right away rather than doing it via state atoms.  This saves a small
    * amount of overhead on every draw call.
    */
   if (!brw->hw_ctx)
      return;

   if (brw->gen == 6)
      brw_emit_post_sync_nonzero_flush(brw);

   brw_upload_invariant_state(brw);

   /* Recommended optimization for Victim Cache eviction in pixel backend. */
   if (brw->gen >= 9) {
      BEGIN_BATCH(3);
      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
      OUT_BATCH(GEN7_CACHE_MODE_1);
      OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
                GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
      ADVANCE_BATCH();
   }

   if (brw->gen >= 8) {
      gen8_emit_3dstate_sample_pattern(brw);
   }
}
示例#2
0
/* Emit a pipelined flush to either flush render and texture cache for
 * reading from a FBO-drawn texture, or flush so that frontbuffer
 * render appears on the screen in DRI1.
 *
 * This is also used for the always_flush_cache driconf debug option.
 */
void
brw_emit_mi_flush(struct brw_context *brw)
{
   if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
      BEGIN_BATCH_BLT(4);
      OUT_BATCH(MI_FLUSH_DW);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
      if (brw->gen >= 6) {
         if (brw->gen == 9) {
            /* Hardware workaround: SKL
             *
             * Emit Pipe Control with all bits set to zero before emitting
             * a Pipe Control with VF Cache Invalidate set.
             */
            brw_emit_pipe_control_flush(brw, 0);
         }

         flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                  PIPE_CONTROL_VF_CACHE_INVALIDATE |
                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                  PIPE_CONTROL_CS_STALL;

         if (brw->gen == 6) {
            /* Hardware workaround: SNB B-Spec says:
             *
             * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
             * Flush Enable =1, a PIPE_CONTROL with any non-zero
             * post-sync-op is required.
             */
            brw_emit_post_sync_nonzero_flush(brw);
         }
      }
      brw_emit_pipe_control_flush(brw, flags);
   }

   brw_render_cache_set_clear(brw);
}
static inline void
brw_upload_pipeline_state(struct brw_context *brw,
                          enum brw_pipeline pipeline)
{
   struct gl_context *ctx = &brw->ctx;
   int i;
   static int dirty_count = 0;
   struct brw_state_flags state = brw->state.pipelines[pipeline];
   unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);

   brw_select_pipeline(brw, pipeline);

   if (0) {
      /* Always re-emit all state. */
      brw->NewGLState = ~0;
      ctx->NewDriverState = ~0ull;
   }

   if (pipeline == BRW_RENDER_PIPELINE) {
      if (brw->fragment_program != ctx->FragmentProgram._Current) {
         brw->fragment_program = ctx->FragmentProgram._Current;
         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
      }

      if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
         brw->tess_eval_program = ctx->TessEvalProgram._Current;
         brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
      }

      if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
         brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
         brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
      }

      if (brw->geometry_program != ctx->GeometryProgram._Current) {
         brw->geometry_program = ctx->GeometryProgram._Current;
         brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
      }

      if (brw->vertex_program != ctx->VertexProgram._Current) {
         brw->vertex_program = ctx->VertexProgram._Current;
         brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
      }
   }

   if (brw->compute_program != ctx->ComputeProgram._Current) {
      brw->compute_program = ctx->ComputeProgram._Current;
      brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
   }

   if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
      brw->meta_in_progress = _mesa_meta_in_progress(ctx);
      brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
   }

   if (brw->num_samples != fb_samples) {
      brw->num_samples = fb_samples;
      brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
   }

   /* Exit early if no state is flagged as dirty */
   merge_ctx_state(brw, &state);
   if ((state.mesa | state.brw) == 0)
      return;

   /* Emit Sandybridge workaround flushes on every primitive, for safety. */
   if (brw->gen == 6)
      brw_emit_post_sync_nonzero_flush(brw);

   brw_upload_programs(brw, pipeline);
   merge_ctx_state(brw, &state);

   const struct brw_tracked_state *atoms =
      brw_get_pipeline_atoms(brw, pipeline);
   const int num_atoms = brw->num_atoms[pipeline];

   if (unlikely(INTEL_DEBUG)) {
      /* Debug version which enforces various sanity checks on the
       * state flags which are generated and checked to help ensure
       * state atoms are ordered correctly in the list.
       */
      struct brw_state_flags examined, prev;
      memset(&examined, 0, sizeof(examined));
      prev = state;

      for (i = 0; i < num_atoms; i++) {
	 const struct brw_tracked_state *atom = &atoms[i];
	 struct brw_state_flags generated;

         check_and_emit_atom(brw, &state, atom);

	 accumulate_state(&examined, &atom->dirty);

	 /* generated = (prev ^ state)
	  * if (examined & generated)
	  *     fail;
	  */
	 xor_states(&generated, &prev, &state);
	 assert(!check_state(&examined, &generated));
	 prev = state;
      }
   }
   else {
      for (i = 0; i < num_atoms; i++) {
	 const struct brw_tracked_state *atom = &atoms[i];

         check_and_emit_atom(brw, &state, atom);
      }
   }

   if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
      STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);

      brw_update_dirty_count(mesa_bits, state.mesa);
      brw_update_dirty_count(brw_bits, state.brw);
      if (dirty_count++ % 1000 == 0) {
	 brw_print_dirty_count(mesa_bits);
	 brw_print_dirty_count(brw_bits);
	 fprintf(stderr, "\n");
      }
   }
}
示例#4
0
/**
 * \brief Execute a blit or render pass operation.
 *
 * To execute the operation, this function manually constructs and emits a
 * batch to draw a rectangle primitive. The batchbuffer is flushed before
 * constructing and after emitting the batch.
 *
 * This function alters no GL state.
 */
void
gen6_blorp_exec(struct brw_context *brw,
                const struct brw_blorp_params *params)
{
   uint32_t cc_blend_state_offset = 0;
   uint32_t cc_state_offset = 0;
   uint32_t depthstencil_offset;
   uint32_t wm_push_const_offset = 0;
   uint32_t wm_bind_bo_offset = 0;

   /* Emit workaround flushes when we switch from drawing to blorping. */
   brw_emit_post_sync_nonzero_flush(brw);

   brw_upload_state_base_address(brw);

   gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
   gen6_emit_3dstate_sample_mask(brw,
                                 params->dst.num_samples > 1 ?
                                 (1 << params->dst.num_samples) - 1 : 1);
   gen6_blorp_emit_vertices(brw, params);
   gen6_blorp_emit_urb_config(brw, params);
   if (params->wm_prog_data) {
      cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
      cc_state_offset = gen6_blorp_emit_cc_state(brw);
   }
   depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
   gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset,
                                     depthstencil_offset, cc_state_offset);
   if (params->wm_prog_data) {
      uint32_t wm_surf_offset_renderbuffer;
      uint32_t wm_surf_offset_texture = 0;
      wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
      intel_miptree_used_for_rendering(params->dst.mt);
      wm_surf_offset_renderbuffer =
         gen6_blorp_emit_surface_state(brw, params, &params->dst,
                                       I915_GEM_DOMAIN_RENDER,
                                       I915_GEM_DOMAIN_RENDER);
      if (params->src.mt) {
         wm_surf_offset_texture =
            gen6_blorp_emit_surface_state(brw, params, &params->src,
                                          I915_GEM_DOMAIN_SAMPLER, 0);
      }
      wm_bind_bo_offset =
         gen6_blorp_emit_binding_table(brw,
                                       wm_surf_offset_renderbuffer,
                                       wm_surf_offset_texture);
   }

   if (params->src.mt) {
      const uint32_t sampler_offset =
         gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
      gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset);
   }
   gen6_blorp_emit_vs_disable(brw, params);
   gen6_blorp_emit_gs_disable(brw, params);
   gen6_blorp_emit_clip_disable(brw);
   gen6_blorp_emit_sf_config(brw, params);
   if (params->wm_prog_data)
      gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
   else
      gen6_blorp_emit_constant_ps_disable(brw, params);
   gen6_blorp_emit_wm_config(brw, params);
   if (params->wm_prog_data)
      gen6_blorp_emit_binding_table_pointers(brw, wm_bind_bo_offset);
   gen6_blorp_emit_viewport_state(brw, params);

   if (params->depth.mt)
      gen6_blorp_emit_depth_stencil_config(brw, params);
   else
      gen6_blorp_emit_depth_disable(brw, params);
   gen6_blorp_emit_clear_params(brw, params);
   gen6_blorp_emit_drawing_rectangle(brw, params);
   gen6_blorp_emit_primitive(brw, params);
}
/**
 * Emit a series of PIPE_CONTROL commands, taking into account any
 * workarounds necessary to actually accomplish the caller's request.
 *
 * Unless otherwise noted, spec quotations in this function come from:
 *
 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
 * Restrictions for PIPE_CONTROL.
 *
 * You should not use this function directly.  Use the helpers in
 * brw_pipe_control.c instead, which may split the pipe control further.
 */
void
genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
{
   UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
   enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
   enum pipe_control_flags non_lri_post_sync_flags =
      post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;

   /* Recursive PIPE_CONTROL workarounds --------------------------------
    * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
    *
    * We do these first because we want to look at the original operation,
    * rather than any workarounds we set.
    */
   if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
      /* Hardware workaround: SNB B-Spec says:
       *
       *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
       *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
       *     required."
       */
      brw_emit_post_sync_nonzero_flush(brw);
   }

   if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
      /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
       * lists several workarounds:
       *
       *    "Project: SKL, KBL, BXT
       *
       *     If the VF Cache Invalidation Enable is set to a 1 in a
       *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
       *     sets to 0, with the VF Cache Invalidation Enable set to 0
       *     needs to be sent prior to the PIPE_CONTROL with VF Cache
       *     Invalidation Enable set to a 1."
       */
      genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
   }

   if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
      /* Project: SKL / Argument: LRI Post Sync Operation [23]
       *
       * "PIPECONTROL command with “Command Streamer Stall Enable” must be
       *  programmed prior to programming a PIPECONTROL command with "LRI
       *  Post Sync Operation" in GPGPU mode of operation (i.e when
       *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
       *
       * The same text exists a few rows below for Post Sync Op.
       */
      genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
   }

   /* "Flush Types" workarounds ---------------------------------------------
    * We do these now because they may add post-sync operations or CS stalls.
    */

   if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
      /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
       *
       * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
       *  'Write PS Depth Count' or 'Write Timestamp'."
       */
      if (!bo) {
         flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
         post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
         non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
         bo = brw->workaround_bo;
      }
   }

   if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
      /* Project: PRE-HSW / Argument: Depth Stall
       *
       * "The following bits must be clear:
       *  - Render Target Cache Flush Enable ([12] of DW1)
       *  - Depth Cache Flush Enable ([0] of DW1)"
       */
      assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
                        PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
   }

   if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
      /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
       *
       *    "This bit must be DISABLED for operations other than writing
       *     PS_DEPTH_COUNT."
       *
       * This seems like nonsense.  An Ivybridge workaround requires us to
       * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
       * operation.  Gen8+ requires us to emit depth stalls and depth cache
       * flushes together.  So, it's hard to imagine this means anything other
       * than "we originally intended this to be used for PS_DEPTH_COUNT".
       *
       * We ignore the supposed restriction and do nothing.
       */
   }

   if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
      /* Project: PRE-HSW / Argument: Depth Cache Flush
       *
       * "Depth Stall must be clear ([13] of DW1)."
       */
      assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
   }

   if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
                PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
      /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
       *
       *    "This bit must be DISABLED for End-of-pipe (Read) fences,
       *     PS_DEPTH_COUNT or TIMESTAMP queries."
       *
       * TODO: Implement end-of-pipe checking.
       */
      assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
                                  PIPE_CONTROL_WRITE_TIMESTAMP)));
   }

   if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
      /* From the PIPE_CONTROL instruction table, bit 1:
       *
       *    "This bit is ignored if Depth Stall Enable is set.
       *     Further, the render cache is not flushed even if Write Cache
       *     Flush Enable bit is set."
       *
       * We assert that the caller doesn't do this combination, to try and
       * prevent mistakes.  It shouldn't hurt the GPU, though.
       *
       * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard"
       * and "Render Target Flush" combo is explicitly required for BTI
       * update workarounds.
       */
      assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
                        PIPE_CONTROL_RENDER_TARGET_FLUSH)));
   }

   /* PIPE_CONTROL page workarounds ------------------------------------- */

   if (IS_GEN_BETWEEN(7, 8) && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
      /* From the PIPE_CONTROL page itself:
       *
       *    "IVB, HSW, BDW
       *     Restriction: Pipe_control with CS-stall bit set must be issued
       *     before a pipe-control command that has the State Cache
       *     Invalidate bit set."
       */
      flags |= PIPE_CONTROL_CS_STALL;
   }

   if (GEN_IS_HASWELL) {
      /* From the PIPE_CONTROL page itself:
       *
       *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
       *     Prior to programming a PIPECONTROL command with any of the RO
       *     cache invalidation bit set, program a PIPECONTROL flush command
       *     with “CS stall” bit and “HDC Flush” bit set."
       *
       * TODO: Actually implement this.  What's an HDC Flush?
       */
   }

   if (flags & PIPE_CONTROL_FLUSH_LLC) {
      /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
       *
       *    "Project: ALL
       *     SW must always program Post-Sync Operation to "Write Immediate
       *     Data" when Flush LLC is set."
       *
       * For now, we just require the caller to do it.
       */
      assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
   }

   /* "Post-Sync Operation" workarounds -------------------------------- */

   /* Project: All / Argument: Global Snapshot Count Reset [19]
    *
    * "This bit must not be exercised on any product.
    *  Requires stall bit ([20] of DW1) set."
    *
    * We don't use this, so we just assert that it isn't used.  The
    * PIPE_CONTROL instruction page indicates that they intended this
    * as a debug feature and don't think it is useful in production,
    * but it may actually be usable, should we ever want to.
    */
   assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);

   if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
      /* Project: All / Arguments:
       *
       * - Generic Media State Clear [16]
       * - Indirect State Pointers Disable [16]
       *
       *    "Requires stall bit ([20] of DW1) set."
       *
       * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
       * State Clear) says:
       *
       *    "PIPECONTROL command with “Command Streamer Stall Enable” must be
       *     programmed prior to programming a PIPECONTROL command with "Media
       *     State Clear" set in GPGPU mode of operation"
       *
       * This is a subset of the earlier rule, so there's nothing to do.
       */
      flags |= PIPE_CONTROL_CS_STALL;
   }

   if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
      /* Project: All / Argument: Store Data Index
       *
       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
       *  than '0'."
       *
       * For now, we just assert that the caller does this.  We might want to
       * automatically add a write to the workaround BO...
       */
      assert(non_lri_post_sync_flags != 0);
   }

   if (flags & PIPE_CONTROL_SYNC_GFDT) {
      /* Project: All / Argument: Sync GFDT
       *
       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
       *  than '0' or 0x2520[13] must be set."
       *
       * For now, we just assert that the caller does this.
       */
      assert(non_lri_post_sync_flags != 0);
   }

   if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
      /* Project: SNB, IVB, HSW / Argument: TLB inv
       *
       * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
       *  must be set to something other than '0'."
       *
       * For now, we just assert that the caller does this.
       */
      assert(non_lri_post_sync_flags != 0);
   }

   if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
      /* Project: IVB+ / Argument: TLB inv
       *
       *    "Requires stall bit ([20] of DW1) set."
       *
       * Also, from the PIPE_CONTROL instruction table:
       *
       *    "Project: SKL+
       *     Post Sync Operation or CS stall must be set to ensure a TLB
       *     invalidation occurs.  Otherwise no cycle will occur to the TLB
       *     cache to invalidate."
       *
       * This is not a subset of the earlier rule, so there's nothing to do.
       */
      flags |= PIPE_CONTROL_CS_STALL;
   }

   if (GEN_GEN == 9 && devinfo->gt == 4) {
      /* TODO: The big Skylake GT4 post sync op workaround */
   }

   /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */

   if (IS_COMPUTE_PIPELINE(brw)) {
      if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
         /* Project: SKL+ / Argument: Tex Invalidate
          * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
          */
         flags |= PIPE_CONTROL_CS_STALL;
      }

      if (GEN_GEN == 8 && (post_sync_flags ||
                           (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
                                     PIPE_CONTROL_DEPTH_STALL |
                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                     PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
         /* Project: BDW / Arguments:
          *
          * - LRI Post Sync Operation   [23]
          * - Post Sync Op              [15:14]
          * - Notify En                 [8]
          * - Depth Stall               [13]
          * - Render Target Cache Flush [12]
          * - Depth Cache Flush         [0]
          * - DC Flush Enable           [5]
          *
          *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
          *     Workloads."
          *
          * (The docs have separate table rows for each bit, with essentially
          * the same workaround text.  We've combined them here.)
          */
         flags |= PIPE_CONTROL_CS_STALL;

         /* Also, from the PIPE_CONTROL instruction table, bit 20:
          *
          *    "Project: BDW
          *     This bit must be always set when PIPE_CONTROL command is
          *     programmed by GPGPU and MEDIA workloads, except for the cases
          *     when only Read Only Cache Invalidation bits are set (State
          *     Cache Invalidation Enable, Instruction cache Invalidation
          *     Enable, Texture Cache Invalidation Enable, Constant Cache
          *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
          *     need not implemented when FF_DOP_CG is disable via "Fixed
          *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
          *
          * It sounds like we could avoid CS stalls in some cases, but we
          * don't currently bother.  This list isn't exactly the list above,
          * either...
          */
      }
   }

   /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
    *
    * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
    *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
    *
    * Note that the kernel does CS stalls between batches, so we only need
    * to count them within a batch.  We currently naively count every 4, and
    * don't skip the ones with only read-cache-invalidate bits set.  This
    * may or may not be a problem...
    */
   if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
      if (flags & PIPE_CONTROL_CS_STALL) {
         /* If we're doing a CS stall, reset the counter and carry on. */
         brw->pipe_controls_since_last_cs_stall = 0;
      }

      /* If this is the fourth pipe control without a CS stall, do one now. */
      if (++brw->pipe_controls_since_last_cs_stall == 4) {
         brw->pipe_controls_since_last_cs_stall = 0;
         flags |= PIPE_CONTROL_CS_STALL;
      }
   }

   /* "Stall" workarounds ----------------------------------------------
    * These have to come after the earlier ones because we may have added
    * some additional CS stalls above.
    */

   if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
      /* Project: PRE-SKL, VLV, CHV
       *
       * "[All Stepping][All SKUs]:
       *
       *  One of the following must also be set:
       *
       *  - Render Target Cache Flush Enable ([12] of DW1)
       *  - Depth Cache Flush Enable ([0] of DW1)
       *  - Stall at Pixel Scoreboard ([1] of DW1)
       *  - Depth Stall ([13] of DW1)
       *  - Post-Sync Operation ([13] of DW1)
       *  - DC Flush Enable ([5] of DW1)"
       *
       * If we don't already have one of those bits set, we choose to add
       * "Stall at Pixel Scoreboard".  Some of the other bits require a
       * CS stall as a workaround (see above), which would send us into
       * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
       * appears to be safe, so we choose that.
       */
      const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                               PIPE_CONTROL_WRITE_IMMEDIATE |
                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
                               PIPE_CONTROL_WRITE_TIMESTAMP |
                               PIPE_CONTROL_STALL_AT_SCOREBOARD |
                               PIPE_CONTROL_DEPTH_STALL |
                               PIPE_CONTROL_DATA_CACHE_FLUSH;
      if (!(flags & wa_bits))
         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
   }

   /* Emit --------------------------------------------------------------- */

   brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
   #if GEN_GEN >= 9
      pc.FlushLLC = 0;
   #endif
   #if GEN_GEN >= 7
      pc.LRIPostSyncOperation = NoLRIOperation;
      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
   #endif
   #if GEN_GEN >= 6
      pc.StoreDataIndex = 0;
      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
      pc.GlobalSnapshotCountReset =
         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
      pc.RenderTargetCacheFlushEnable =
         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
      pc.StateCacheInvalidationEnable =
         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
      pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
      pc.ConstantCacheInvalidationEnable =
         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   #else
      pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
   #endif
      pc.PostSyncOperation = flags_to_post_sync_op(flags);
      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
      pc.InstructionCacheInvalidateEnable =
         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
   #if GEN_GEN >= 5 || GEN_IS_G4X
      pc.IndirectStatePointersDisable =
         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
   #endif
   #if GEN_GEN >= 6
      pc.TextureCacheInvalidationEnable =
         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   #elif GEN_GEN == 5 || GEN_IS_G4X
      pc.TextureCacheFlushEnable =
         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   #endif
      pc.Address = ggtt_bo(bo, offset);
      if (GEN_GEN < 7 && bo)
         pc.DestinationAddressType = DAT_GGTT;
      pc.ImmediateData = imm;
   }
static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   const struct brw_compiler *compiler = brw->screen->compiler;

   /* On platforms with hardware contexts, we can set our initial GPU state
    * right away rather than doing it via state atoms.  This saves a small
    * amount of overhead on every draw call.
    */
   if (!brw->hw_ctx)
      return;

   if (devinfo->gen == 6)
      brw_emit_post_sync_nonzero_flush(brw);

   brw_upload_invariant_state(brw);

   if (devinfo->gen == 11) {
      /* The default behavior of bit 5 "Headerless Message for Pre-emptable
       * Contexts" in SAMPLER MODE register is set to 0, which means
       * headerless sampler messages are not allowed for pre-emptable
       * contexts. Set the bit 5 to 1 to allow them.
       */
      brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
                              HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
                              HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);

      /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
       * HALF_SLICE_CHICKEN7 register.
       */
      brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
                              TEXEL_OFFSET_FIX_MASK |
                              TEXEL_OFFSET_FIX_ENABLE);

      /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
       * in L3CNTLREG register. The default setting of the bit is not the
       * desirable behavior.
       */
      brw_load_register_imm32(brw, GEN8_L3CNTLREG,
                              GEN8_L3CNTLREG_EDBC_NO_HANG);

      /* WA_2204188704: Pixel Shader Panic dispatch must be disabled.
       */
       brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN3,
                               PS_THREAD_PANIC_DISPATCH_MASK |
                               PS_THREAD_PANIC_DISPATCH);

       /* WaEnableStateCacheRedirectToCS:icl */
       brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
                               GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
                               REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE));
   }

   if (devinfo->gen == 10 || devinfo->gen == 11) {
      /* From gen10 workaround table in h/w specs:
       *
       *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
       *     a value of 0xFFFF"
       *
       * This means that we end up setting the entire 3D_MODE state. Bits
       * in this register control things such as slice hashing and we want
       * the default values of zero at the moment.
       */
      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
      OUT_BATCH(0xFFFF << 16);
      ADVANCE_BATCH();
   }

   if (devinfo->gen == 9) {
      /* Recommended optimizations for Victim Cache eviction and floating
       * point blending.
       */
      brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
                              REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
                              REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
                              GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
                              GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);

      if (gen_device_info_is_9lp(devinfo)) {
         brw_load_register_imm32(brw, GEN7_GT_MODE,
                                 GEN9_SUBSLICE_HASHING_MASK_BITS |
                                 GEN9_SUBSLICE_HASHING_16x16);
      }
   }

   if (devinfo->gen >= 8) {
      gen8_emit_3dstate_sample_pattern(brw);

      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
    * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
    *
    * This is only safe on kernels with context isolation support.
    */
   if (!compiler->constant_buffer_0_is_relative) {
      if (devinfo->gen >= 9) {
         BEGIN_BATCH(3);
         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
         OUT_BATCH(CS_DEBUG_MODE2);
         OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
                   CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
         ADVANCE_BATCH();
      } else if (devinfo->gen == 8) {
         BEGIN_BATCH(3);
         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
         OUT_BATCH(INSTPM);
         OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
                   INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
         ADVANCE_BATCH();
      }
   }

   brw->object_preemption = false;

   if (devinfo->gen >= 10)
      brw_enable_obj_preemption(brw, true);
}
示例#7
0
void
brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
{
   const bool is_965 = brw->gen == 4 && !brw->is_g4x;
   const uint32_t _3DSTATE_PIPELINE_SELECT =
      is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;

   if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) {
      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
       * PIPELINE_SELECT [DevBWR+]":
       *
       *   Project: HSW, BDW, CHV, SKL, BXT
       *
       *   Hardware Binding Tables are only supported for 3D
       *   workloads. Resource streamer must be enabled only for 3D
       *   workloads. Resource streamer must be disabled for Media and GPGPU
       *   workloads.
       */
      BEGIN_BATCH(1);
      OUT_BATCH(MI_RS_CONTROL | 0);
      ADVANCE_BATCH();

      gen7_disable_hw_binding_tables(brw);

      /* XXX - Disable gather constant pool too when we start using it. */
   }

   if (brw->gen >= 8 && brw->gen < 10) {
      /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
       *
       *   Software must clear the COLOR_CALC_STATE Valid field in
       *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
       *   with Pipeline Select set to GPGPU.
       *
       * The internal hardware docs recommend the same workaround for Gen9
       * hardware too.
       */
      if (pipeline == BRW_COMPUTE_PIPELINE) {
         BEGIN_BATCH(2);
         OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
         OUT_BATCH(0);
         ADVANCE_BATCH();

         brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
      }

   } else if (brw->gen >= 6) {
      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
       * PIPELINE_SELECT [DevBWR+]":
       *
       *   Project: DEVSNB+
       *
       *   Software must ensure all the write caches are flushed through a
       *   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
       *   command to invalidate read only caches prior to programming
       *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
       */
      const unsigned dc_flush =
         brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;

      if (brw->gen == 6) {
         /* Hardware workaround: SNB B-Spec says:
          *
          *   Before a PIPE_CONTROL with Write Cache Flush Enable = 1, a
          *   PIPE_CONTROL with any non-zero post-sync-op is required.
          */
         brw_emit_post_sync_nonzero_flush(brw);
      }

      brw_emit_pipe_control_flush(brw,
                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                  dc_flush |
                                  PIPE_CONTROL_NO_WRITE |
                                  PIPE_CONTROL_CS_STALL);

      brw_emit_pipe_control_flush(brw,
                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE |
                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE |
                                  PIPE_CONTROL_NO_WRITE);

   } else {
      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
       * PIPELINE_SELECT [DevBWR+]":
       *
       *   Project: PRE-DEVSNB
       *
       *   Software must ensure the current pipeline is flushed via an
       *   MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
       */
      BEGIN_BATCH(1);
      OUT_BATCH(MI_FLUSH);
      ADVANCE_BATCH();
   }

   /* Select the pipeline */
   BEGIN_BATCH(1);
   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
             (brw->gen >= 9 ? (3 << 8) : 0) |
             (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
   ADVANCE_BATCH();

   if (brw->gen == 7 && !brw->is_haswell &&
       pipeline == BRW_RENDER_PIPELINE) {
      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
       * PIPELINE_SELECT [DevBWR+]":
       *
       *   Project: DEVIVB, DEVHSW:GT3:A0
       *
       *   Software must send a pipe_control with a CS stall and a post sync
       *   operation and then a dummy DRAW after every MI_SET_CONTEXT and
       *   after any PIPELINE_SELECT that is enabling 3D mode.
       */
      gen7_emit_cs_stall_flush(brw);

      BEGIN_BATCH(7);
      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
      OUT_BATCH(_3DPRIM_POINTLIST);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) {
      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
       * PIPELINE_SELECT [DevBWR+]":
       *
       *   Project: HSW, BDW, CHV, SKL, BXT
       *
       *   Hardware Binding Tables are only supported for 3D
       *   workloads. Resource streamer must be enabled only for 3D
       *   workloads. Resource streamer must be disabled for Media and GPGPU
       *   workloads.
       */
      BEGIN_BATCH(1);
      OUT_BATCH(MI_RS_CONTROL | 1);
      ADVANCE_BATCH();

      gen7_enable_hw_binding_tables(brw);

      /* XXX - Re-enable gather constant pool here. */
   }
}
示例#8
0
/**
 * Emit a PIPE_CONTROL with various flushing flags.
 *
 * The caller is responsible for deciding what flags are appropriate for the
 * given generation.
 */
void
brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
{
   if (brw->gen >= 6 &&
       (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
       (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
      /* A pipe control command with flush and invalidate bits set
       * simultaneously is an inherently racy operation on Gen6+ if the
       * contents of the flushed caches were intended to become visible from
       * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
       * first one should stall the pipeline to make sure that the flushed R/W
       * caches are coherent with memory once the specified R/O caches are
       * invalidated.  On pre-Gen6 hardware the (implicit) R/O cache
       * invalidation seems to happen at the bottom of the pipeline together
       * with any write cache flush, so this shouldn't be a concern.
       */
      brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
                                       PIPE_CONTROL_CS_STALL);
      flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
   }

   if (brw->gen >= 8) {
      if (brw->gen == 8)
         gen8_add_cs_stall_workaround_bits(&flags);

      if (brw->gen == 9 &&
          (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
         /* Hardware workaround: SKL
          *
          * Emit Pipe Control with all bits set to zero before emitting
          * a Pipe Control with VF Cache Invalidate set.
          */
         brw_emit_pipe_control_flush(brw, 0);
      }

      BEGIN_BATCH(6);
      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
      OUT_BATCH(flags);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else if (brw->gen >= 6) {
      if (brw->gen == 6 &&
          (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
         /* Hardware workaround: SNB B-Spec says:
          *
          *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
          *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
          *   required.
          */
         brw_emit_post_sync_nonzero_flush(brw);
      }

      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);

      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
      OUT_BATCH(flags);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(4);
      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}