Exemple #1
0
static void do_flatshade_line( struct brw_sf_compile *c )
{
    struct brw_compile *p = &c->func;
    struct intel_context *intel = &p->brw->intel;
    struct brw_reg ip = brw_ip_reg();
    GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;

    if (!nr)
        return;

    /* Already done in clip program:
     */
    if (c->key.primitive == SF_UNFILLED_TRIS)
        return;

    if (intel->gen == 5)
        jmpi = 2;

    brw_push_insn_state(p);

    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
    brw_JMPI(p, ip, ip, c->pv);
    copy_colors(c, c->vert[1], c->vert[0]);

    brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
    copy_colors(c, c->vert[0], c->vert[1]);

    brw_pop_insn_state(p);
}
/* Need to use a computed jump to copy flatshaded attributes as the
 * vertices are ordered according to y-coordinate before reaching this
 * point, so the PV could be anywhere.
 */
static void do_flatshade_triangle( struct brw_sf_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg ip = brw_ip_reg();
   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
   if (!nr)
      return;

   /* Already done in clip program:
    */
   if (c->key.primitive == SF_UNFILLED_TRIS)
      return;

   brw_push_insn_state(p);
   
   brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
   brw_JMPI(p, ip, ip, c->pv);

   copy_colors(c, c->vert[1], c->vert[0]);
   copy_colors(c, c->vert[2], c->vert[0]);
   brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));

   copy_colors(c, c->vert[0], c->vert[1]);
   copy_colors(c, c->vert[2], c->vert[1]);
   brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));

   copy_colors(c, c->vert[0], c->vert[2]);
   copy_colors(c, c->vert[1], c->vert[2]);

   brw_pop_insn_state(p);
}
Exemple #3
0
static void
upload_clip_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   uint32_t depth_clamp = 0;
   uint32_t provoking, userclip;

   if (!ctx->Transform.DepthClamp)
      depth_clamp = GEN6_CLIP_Z_TEST;

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
      provoking =
	 (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
	 (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
	 (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
   } else {
      provoking =
	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
   }

   /* _NEW_TRANSFORM */
   userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1;

   BEGIN_BATCH(4);
   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
   OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
   OUT_BATCH(GEN6_CLIP_ENABLE |
	     GEN6_CLIP_API_OGL |
	     GEN6_CLIP_MODE_NORMAL |
	     GEN6_CLIP_XY_TEST |
	     userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
	     depth_clamp |
	     provoking);
   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
             GEN6_CLIP_FORCE_ZERO_RTAINDEX);
   ADVANCE_BATCH();
}
Exemple #4
0
static void brw_upload_vs_prog(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct brw_vs_prog_key key;
   struct brw_vertex_program *vp = 
      (struct brw_vertex_program *)brw->vertex_program;
   int i;

   memset(&key, 0, sizeof(key));

   /* Just upload the program verbatim for now.  Always send it all
    * the inputs it asks for, whether they are varying or not.
    */
   key.program_string_id = vp->id;
   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
   key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
			ctx->Polygon.BackMode != GL_FILL);
   key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);

   /* _NEW_POINT */
   if (ctx->Point.PointSprite) {
      for (i = 0; i < 8; i++) {
	 if (ctx->Point.CoordReplace[i])
	    key.point_coord_replace |= (1 << i);
      }
   }

   /* Make an early check for the key.
    */
   drm_intel_bo_unreference(brw->vs.prog_bo);
   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
				      &key, sizeof(key),
				      NULL, 0,
				      &brw->vs.prog_data);
   if (brw->vs.prog_bo == NULL)
      do_vs_prog(brw, vp, &key);
   brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
			   sizeof(*brw->vs.prog_data));
}
Exemple #5
0
static void compile_gs_prog( struct brw_context *brw,
			     struct brw_gs_prog_key *key )
{
   struct intel_context *intel = &brw->intel;
   struct brw_gs_compile c;
   const GLuint *program;
   void *mem_ctx;
   GLuint program_size;

   /* Gen6: VF has already converted into polygon, and LINELOOP is
    * converted to LINESTRIP at the beginning of the 3D pipeline.
    */
   if (intel->gen >= 6)
      return;

   memset(&c, 0, sizeof(c));
   
   c.key = *key;
   /* Need to locate the two positions present in vertex + header.
    * These are currently hardcoded:
    */
   c.nr_attrs = brw_count_bits(c.key.attrs);

   if (intel->gen >= 5)
       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
   else
       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */

   c.nr_bytes = c.nr_regs * REG_SIZE;

   mem_ctx = NULL;
   
   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.  
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Note that primitives which don't require a GS program have
    * already been weeded out by this stage:
    */

   switch (key->primitive) {
   case GL_QUADS:
      brw_gs_quads( &c, key );
      break;
   case GL_QUAD_STRIP:
      brw_gs_quad_strip( &c, key );
      break;
   case GL_LINE_LOOP:
      brw_gs_lines( &c );
      break;
   default:
      ralloc_free(mem_ctx);
      return;
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
      int i;

      printf("gs:\n");
      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
		    intel->gen);
      printf("\n");
    }

   brw_upload_cache(&brw->cache, BRW_GS_PROG,
		    &c.key, sizeof(c.key),
		    program, program_size,
		    &c.prog_data, sizeof(c.prog_data),
		    &brw->gs.prog_offset, &brw->gs.prog_data);
   ralloc_free(mem_ctx);
}
Exemple #6
0
static void compile_sf_prog( struct brw_context *brw,
			     struct brw_sf_prog_key *key )
{
   GLcontext *ctx = &brw->intel.ctx;
   struct brw_sf_compile c;
   const GLuint *program;
   GLuint program_size;
   GLuint i, idx;

   memset(&c, 0, sizeof(c));

   /* Begin the compilation:
    */
   brw_init_compile(brw, &c.func);

   c.key = *key;
   c.nr_attrs = brw_count_bits(c.key.attrs);
   c.nr_attr_regs = (c.nr_attrs+1)/2;
   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
   c.nr_setup_regs = (c.nr_setup_attrs+1)/2;

   c.prog_data.urb_read_length = c.nr_attr_regs;
   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;

   /* Construct map from attribute number to position in the vertex.
    */
   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
      if (c.key.attrs & (1<<i)) {
	 c.attr_to_idx[i] = idx;
	 c.idx_to_attr[idx] = i;
	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
            c.point_attrs[i].CoordReplace = 
               ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
	 }
         else {
            c.point_attrs[i].CoordReplace = GL_FALSE;
         }
	 idx++;
      }
   
   /* Which primitive?  Or all three? 
    */
   switch (key->primitive) {
   case SF_TRIANGLES:
      c.nr_verts = 3;
      brw_emit_tri_setup( &c, GL_TRUE );
      break;
   case SF_LINES:
      c.nr_verts = 2;
      brw_emit_line_setup( &c, GL_TRUE );
      break;
   case SF_POINTS:
      c.nr_verts = 1;
      if (key->do_point_sprite)
	  brw_emit_point_sprite_setup( &c, GL_TRUE );
      else
	  brw_emit_point_setup( &c, GL_TRUE );
      break;
   case SF_UNFILLED_TRIS:
      c.nr_verts = 3;
      brw_emit_anyprim_setup( &c );
      break;
   default:
      assert(0);
      return;
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* Upload
    */
   dri_bo_unreference(brw->sf.prog_bo);
   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
				       &c.key, sizeof(c.key),
				       NULL, 0,
				       program, program_size,
				       &c.prog_data,
				       &brw->sf.prog_data );
}
Exemple #7
0
/* Partition the CURBE between the various users of constant values:
 */
static void calculate_curbe_offsets( struct brw_context *brw )
{
   /* CACHE_NEW_WM_PROG */
   GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
   
   /* BRW_NEW_VERTEX_PROGRAM */
   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
   GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16;
   GLuint nr_clip_regs = 0;
   GLuint total_regs;

   /* _NEW_TRANSFORM */
   if (brw->attribs.Transform->ClipPlanesEnabled) {
      GLuint nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
      nr_clip_regs = (nr_planes * 4 + 15) / 16;
   }


   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;

   /* This can happen - what to do?  Probably rather than falling
    * back, the best thing to do is emit programs which code the
    * constants as immediate values.  Could do this either as a static
    * cap on WM and VS, or adaptively.
    *
    * Unfortunately, this is currently dependent on the results of the
    * program generation process (in the case of wm), so this would
    * introduce the need to re-generate programs in the event of a
    * curbe allocation failure.
    */
   /* Max size is 32 - just large enough to
    * hold the 128 parameters allowed by
    * the fragment and vertex program
    * api's.  It's not clear what happens
    * when both VP and FP want to use 128
    * parameters, though. 
    */
   assert(total_regs <= 32);

   /* Lazy resize:
    */
   if (nr_fp_regs > brw->curbe.wm_size ||
       nr_vp_regs > brw->curbe.vs_size ||
       nr_clip_regs > brw->curbe.clip_size ||
       (total_regs < brw->curbe.total_size / 4 &&
	brw->curbe.total_size > 16)) {

      GLuint reg = 0;

      /* Calculate a new layout: 
       */
      reg = 0;
      brw->curbe.wm_start = reg;
      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
      brw->curbe.clip_start = reg;
      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
      brw->curbe.vs_start = reg;
      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
      brw->curbe.total_size = reg;

      if (0)
	 _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
		      brw->curbe.wm_start,
		      brw->curbe.wm_size,
		      brw->curbe.clip_start,
		      brw->curbe.clip_size,
		      brw->curbe.vs_start,
		      brw->curbe.vs_size );

      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
   }
}
Exemple #8
0
static void compile_gs_prog( struct brw_context *brw,
			     struct brw_gs_prog_key *key )
{
   struct brw_gs_compile c;
   const unsigned *program;
   unsigned program_size;

   memset(&c, 0, sizeof(c));

   c.key = *key;

   /* Need to locate the two positions present in vertex + header.
    * These are currently hardcoded:
    */
   c.nr_attrs = brw_count_bits(c.key.attrs);
   c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
   c.nr_bytes = c.nr_regs * REG_SIZE;


   /* Begin the compilation:
    */
   brw_init_compile(&c.func);

   c.func.single_program_flow = 1;

   /* For some reason the thread is spawned with only 4 channels
    * unmasked.
    */
   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);


   /* Note that primitives which don't require a GS program have
    * already been weeded out by this stage:
    */
   switch (key->primitive) {
   case PIPE_PRIM_QUADS:
      brw_gs_quads( &c );
      break;
   case PIPE_PRIM_QUAD_STRIP:
      brw_gs_quad_strip( &c );
      break;
   case PIPE_PRIM_LINE_LOOP:
      brw_gs_lines( &c );
      break;
   case PIPE_PRIM_LINES:
      if (key->hint_gs_always)
	 brw_gs_lines( &c );
      else {
	 return;
      }
      break;
   case PIPE_PRIM_TRIANGLES:
      if (key->hint_gs_always)
	 brw_gs_tris( &c );
      else {
	 return;
      }
      break;
   case PIPE_PRIM_POINTS:
      if (key->hint_gs_always)
	 brw_gs_points( &c );
      else {
	 return;
      }
      break;
   default:
      return;
   }

   /* get the program
    */
   program = brw_get_program(&c.func, &program_size);

   /* Upload
    */
   brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
					      &c.key,
					      sizeof(c.key),
					      program,
					      program_size,
					      &c.prog_data,
					      &brw->gs.prog_data );
}
Exemple #9
0
/* Calculate interpolants for triangle and line rasterization.
 */
static void upload_clip_prog(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLcontext *ctx = &intel->ctx;
   struct brw_clip_prog_key key;

   memset(&key, 0, sizeof(key));

   /* Populate the key:
    */
   /* BRW_NEW_REDUCED_PRIMITIVE */
   key.primitive = brw->intel.reduced_primitive;
   /* CACHE_NEW_VS_PROG */
   key.attrs = brw->vs.prog_data->outputs_written;
   /* _NEW_LIGHT */
   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
   key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
   /* _NEW_TRANSFORM */
   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);

   if (intel->gen == 5)
       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
   else
       key.clip_mode = BRW_CLIPMODE_NORMAL;

   /* _NEW_POLYGON */
   if (key.primitive == GL_TRIANGLES) {
      if (ctx->Polygon.CullFlag &&
	  ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
      else {
	 GLuint fill_front = CLIP_CULL;
	 GLuint fill_back = CLIP_CULL;
	 GLuint offset_front = 0;
	 GLuint offset_back = 0;

	 if (!ctx->Polygon.CullFlag ||
	     ctx->Polygon.CullFaceMode != GL_FRONT) {
	    switch (ctx->Polygon.FrontMode) {
	    case GL_FILL: 
	       fill_front = CLIP_FILL; 
	       offset_front = 0;
	       break;
	    case GL_LINE:
	       fill_front = CLIP_LINE;
	       offset_front = ctx->Polygon.OffsetLine;
	       break;
	    case GL_POINT:
	       fill_front = CLIP_POINT;
	       offset_front = ctx->Polygon.OffsetPoint;
	       break;
	    }
	 }

	 if (!ctx->Polygon.CullFlag ||
	     ctx->Polygon.CullFaceMode != GL_BACK) {
	    switch (ctx->Polygon.BackMode) {
	    case GL_FILL: 
	       fill_back = CLIP_FILL; 
	       offset_back = 0;
	       break;
	    case GL_LINE:
	       fill_back = CLIP_LINE;
	       offset_back = ctx->Polygon.OffsetLine;
	       break;
	    case GL_POINT:
	       fill_back = CLIP_POINT;
	       offset_back = ctx->Polygon.OffsetPoint;
	       break;
	    }
	 }

	 if (ctx->Polygon.BackMode != GL_FILL ||
	     ctx->Polygon.FrontMode != GL_FILL) {
	    key.do_unfilled = 1;

	    /* Most cases the fixed function units will handle.  Cases where
	     * one or more polygon faces are unfilled will require help:
	     */
	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;

	    if (offset_back || offset_front) {
	       /* _NEW_POLYGON, _NEW_BUFFERS */
	       key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
	       key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
	    }

	    switch (ctx->Polygon.FrontFace) {
	    case GL_CCW:
	       key.fill_ccw = fill_front;
	       key.fill_cw = fill_back;
	       key.offset_ccw = offset_front;
	       key.offset_cw = offset_back;
	       if (ctx->Light.Model.TwoSide &&
		   key.fill_cw != CLIP_CULL) 
		  key.copy_bfc_cw = 1;
	       break;
	    case GL_CW:
	       key.fill_cw = fill_front;
	       key.fill_ccw = fill_back;
	       key.offset_cw = offset_front;
	       key.offset_ccw = offset_back;
	       if (ctx->Light.Model.TwoSide &&
		   key.fill_ccw != CLIP_CULL) 
		  key.copy_bfc_ccw = 1;
	       break;
	    }
	 }
      }
   }

   drm_intel_bo_unreference(brw->clip.prog_bo);
   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
					&key, sizeof(key),
					NULL, 0,
					&brw->clip.prog_data);
   if (brw->clip.prog_bo == NULL)
      compile_clip_prog( brw, &key );
}
static void
upload_sf_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLcontext *ctx = &intel->ctx;
   /* CACHE_NEW_VS_PROG */
   uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
   uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead);
   uint32_t dw1, dw2, dw3, dw4, dw16;
   int i;
   /* _NEW_BUFFER */
   GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
   int attr = 0;

   dw1 =
      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
      (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
      1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
   dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
      GEN6_SF_STATISTICS_ENABLE;
   dw3 = 0;
   dw4 = 0;
   dw16 = 0;

   /* _NEW_POLYGON */
   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
      dw2 |= GEN6_SF_WINDING_CCW;

   if (ctx->Polygon.OffsetFill)
       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;

   /* _NEW_SCISSOR */
   if (ctx->Scissor.Enabled)
      dw3 |= GEN6_SF_SCISSOR_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.CullFlag) {
      switch (ctx->Polygon.CullFaceMode) {
      case GL_FRONT:
	 dw3 |= GEN6_SF_CULL_FRONT;
	 break;
      case GL_BACK:
	 dw3 |= GEN6_SF_CULL_BACK;
	 break;
      case GL_FRONT_AND_BACK:
	 dw3 |= GEN6_SF_CULL_BOTH;
	 break;
      default:
	 assert(0);
	 break;
      }
   } else {
      dw3 |= GEN6_SF_CULL_NONE;
   }

   /* _NEW_LINE */
   dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
      GEN6_SF_LINE_WIDTH_SHIFT;
   if (ctx->Line.SmoothFlag) {
      dw3 |= GEN6_SF_LINE_AA_ENABLE;
      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
   }

   /* _NEW_POINT */
   if (ctx->Point._Attenuated)
      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;

   dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
      GEN6_SF_POINT_WIDTH_SHIFT;
   if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
      dw4 |=
	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
   } else {
      dw4 |=
	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
   }

   if (ctx->Point.PointSprite) {
       for (i = 0; i < 8; i++) { 
	   if (ctx->Point.CoordReplace[i])
	       dw16 |= (1 << i);
       }
   }

   BEGIN_BATCH(20);
   OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   OUT_BATCH(dw3);
   OUT_BATCH(dw4);
   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
   for (i = 0; i < 8; i++) {
      uint32_t attr_overrides = 0;

      for (; attr < 64; attr++) {
	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
	    attr_overrides |= get_attr_override(brw, attr);
	    attr++;
	    break;
	 }
      }

      for (; attr < 64; attr++) {
	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
	    attr_overrides |= get_attr_override(brw, attr) << 16;
	    attr++;
	    break;
	 }
      }
      OUT_BATCH(attr_overrides);
   }
   OUT_BATCH(dw16); /* point sprite texcoord bitmask */
   OUT_BATCH(0); /* constant interp bitmask */
   OUT_BATCH(0); /* wrapshortest enables 0-7 */
   OUT_BATCH(0); /* wrapshortest enables 8-15 */
   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel->batch);
}