static void upload_polygon_stipple(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &brw->intel.ctx;
   GLuint i;

   /* _NEW_POLYGON */
   if (!ctx->Polygon.StippleFlag)
      return;

   if (intel->gen == 6)
      intel_emit_post_sync_nonzero_flush(intel);

   BEGIN_BATCH(33);
   OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));

   /* Polygon stipple is provided in OpenGL order, i.e. bottom
    * row first.  If we're rendering to a window (i.e. the
    * default frame buffer object, 0), then we need to invert
    * it to match our pixel layout.  But if we're rendering
    * to a FBO (i.e. any named frame buffer object), we *don't*
    * need to invert - we already match the layout.
    */
   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
      for (i = 0; i < 32; i++)
	  OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
   }
   else {
      for (i = 0; i < 32; i++)
	 OUT_BATCH(ctx->PolygonStipple[i]);
   }
   CACHED_BATCH();
}
static void upload_polygon_stipple_offset(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &brw->intel.ctx;

   /* _NEW_POLYGON */
   if (!ctx->Polygon.StippleFlag)
      return;

   if (intel->gen == 6)
      intel_emit_post_sync_nonzero_flush(intel);

   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));

   /* _NEW_BUFFERS
    *
    * If we're drawing to a system window we have to invert the Y axis
    * in order to match the OpenGL pixel coordinate system, and our
    * offset must be matched to the window position.  If we're drawing
    * to a user-created FBO then our native pixel coordinate system
    * works just fine, and there's no window system to worry about.
    */
   if (_mesa_is_winsys_fbo(brw->intel.ctx.DrawBuffer))
      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
   else
      OUT_BATCH(0);
   CACHED_BATCH();
}
Пример #3
0
static void upload_line_stipple(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   GLfloat tmp;
   GLint tmpi;

   if (!ctx->Line.StippleFlag)
      return;

   if (brw->gen == 6)
      intel_emit_post_sync_nonzero_flush(brw);

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
   OUT_BATCH(ctx->Line.StipplePattern);

   if (brw->gen >= 7) {
      /* in U1.16 */
      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
      tmpi = tmp * (1<<16);
      OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
   }
   else {
      /* in U1.13 */
      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
      tmpi = tmp * (1<<13);
      OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
   }

   CACHED_BATCH();
}
Пример #4
0
static void upload_blend_constant_color(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;

   BEGIN_BATCH(5);
   OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[0]);
   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]);
   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]);
   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]);
   CACHED_BATCH();
}
Пример #5
0
/**********************************************************************
 * AA Line parameters
 */
static void upload_aa_line_parameters(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;

   if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
      return;

   if (brw->gen == 6)
      intel_emit_post_sync_nonzero_flush(brw);

   OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
   /* use legacy aa line coverage computation */
   OUT_BATCH(0);
   OUT_BATCH(0);
   CACHED_BATCH();
}
Пример #6
0
/* Define the number of curbes within CS's urb allocation.  Multiple
 * urb entries -> multiple curbes.  These will be used by
 * fixed-function hardware in a double-buffering scheme to avoid a
 * pipeline stall each time the contents of the curbe is changed.
 */
void brw_upload_cs_urb_state(struct brw_context *brw)
{
   BEGIN_BATCH(2);
   /* It appears that this is the state packet for the CS unit, ie. the
    * urb entries detailed here are housed in the CS range from the
    * URB_FENCE command.
    */
   OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));

   /* BRW_NEW_URB_FENCE */
   if (brw->urb.csize == 0) {
      OUT_BATCH(0);
   } else {
      /* BRW_NEW_URB_FENCE */
      assert(brw->urb.nr_cs_entries);
      OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
   }
   CACHED_BATCH();
}
Пример #7
0
static void brw_emit_vertices(struct brw_context *brw)
{
   GLuint i, nr_elements;

   brw_prepare_vertices(brw);

   brw_emit_query_begin(brw);

   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (nr_elements == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
      if (brw->gen >= 6) {
	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      }
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      CACHED_BATCH();
      return;
   }

   /* Now emit VB and VEP state packets.
    */

   if (brw->vb.nr_buffers) {
      if (brw->gen >= 6) {
	 assert(brw->vb.nr_buffers <= 33);
      } else {
	 assert(brw->vb.nr_buffers <= 17);
      }

      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
	 uint32_t dw0;

	 if (brw->gen >= 6) {
	    dw0 = buffer->step_rate
	             ? GEN6_VB0_ACCESS_INSTANCEDATA
	             : GEN6_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << GEN6_VB0_INDEX_SHIFT;
	 } else {
	    dw0 = buffer->step_rate
	             ? BRW_VB0_ACCESS_INSTANCEDATA
	             : BRW_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << BRW_VB0_INDEX_SHIFT;
	 }

	 if (brw->gen >= 7)
	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

         if (brw->gen == 7)
	    dw0 |= GEN7_MOCS_L3 << 16;

	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
	 if (brw->gen >= 5) {
	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
	 } else
	    OUT_BATCH(0);
	 OUT_BATCH(buffer->step_rate);
      }
      ADVANCE_BATCH();
   }

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
    * for VertexID/InstanceID.
    */
   if (brw->gen >= 6) {
      assert(nr_elements <= 34);
   } else {
      assert(nr_elements <= 18);
   }

   struct brw_vertex_element *gen6_edgeflag_input = NULL;

   BEGIN_BATCH(1 + nr_elements * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
   for (i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      /* The gen4 driver expects edgeflag to come in as a float, and passes
       * that float on to the tests in the clipper.  Mesa's current vertex
       * attribute value for EdgeFlag is stored as a float, which works out.
       * glEdgeFlagPointer, on the other hand, gives us an unnormalized
       * integer ubyte.  Just rewrite that to convert to a float.
       */
      if (input->attrib == VERT_ATTRIB_EDGEFLAG) {
         /* Gen6+ passes edgeflag as sideband along with the vertex, instead
          * of in the VUE.  We have to upload it sideband as the last vertex
          * element according to the B-Spec.
          */
         if (brw->gen >= 6) {
            gen6_edgeflag_input = input;
            continue;
         }

         if (format == BRW_SURFACEFORMAT_R8_UINT)
            format = BRW_SURFACEFORMAT_R8_SSCALED;
      }

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
	 break;
      }

      if (brw->gen >= 6) {
	 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      }

      if (brw->gen >= 5)
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
      else
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

   if (brw->gen >= 6 && gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);

      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
                GEN6_VE0_VALID |
                GEN6_VE0_EDGE_FLAG_ENABLE |
                (format << BRW_VE0_FORMAT_SHIFT) |
                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }

   if (brw->vs.prog_data->uses_vertexid) {
      uint32_t dw0 = 0, dw1 = 0;

      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));

      if (brw->gen >= 6) {
	 dw0 |= GEN6_VE0_VALID;
      } else {
	 dw0 |= BRW_VE0_VALID;
	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
      }

      /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
       * the format is ignored and the value is always int.
       */

      OUT_BATCH(dw0);
      OUT_BATCH(dw1);
   }

   CACHED_BATCH();
}
Пример #8
0
static void brw_emit_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   GLuint i, nr_elements;

   brw_prepare_vertices(brw);

   brw_emit_query_begin(brw);

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (brw->vb.nr_enabled == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
      if (intel->gen >= 6) {
	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      }
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      CACHED_BATCH();
      return;
   }

   /* Now emit VB and VEP state packets.
    */

   if (brw->vb.nr_buffers) {
      if (intel->gen >= 6) {
	 assert(brw->vb.nr_buffers <= 33);
      } else {
	 assert(brw->vb.nr_buffers <= 17);
      }

      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
	 uint32_t dw0;

	 if (intel->gen >= 6) {
	    dw0 = buffer->step_rate
	             ? GEN6_VB0_ACCESS_INSTANCEDATA
	             : GEN6_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << GEN6_VB0_INDEX_SHIFT;
	 } else {
	    dw0 = buffer->step_rate
	             ? BRW_VB0_ACCESS_INSTANCEDATA
	             : BRW_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << BRW_VB0_INDEX_SHIFT;
	 }

	 if (intel->gen >= 7)
	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
	 if (intel->gen >= 5) {
	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
	 } else
	    OUT_BATCH(0);
	 OUT_BATCH(buffer->step_rate);

	 brw->vb.current_buffers[i].handle = buffer->bo->handle;
	 brw->vb.current_buffers[i].offset = buffer->offset;
	 brw->vb.current_buffers[i].stride = buffer->stride;
	 brw->vb.current_buffers[i].step_rate = buffer->step_rate;
      }
      brw->vb.nr_current_buffers = i;
      ADVANCE_BATCH();
   }

   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
    * for VertexID/InstanceID.
    */
   if (intel->gen >= 6) {
      assert(nr_elements <= 34);
   } else {
      assert(nr_elements <= 18);
   }

   BEGIN_BATCH(1 + nr_elements * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
   for (i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = get_surface_type(input->glarray->Type,
					 input->glarray->Size,
					 input->glarray->Format,
					 input->glarray->Normalized,
                                         input->glarray->Integer);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
	 break;
      }

      if (intel->gen >= 6) {
	 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      }

      if (intel->gen >= 5)
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
      else
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

   if (brw->vs.prog_data->uses_vertexid) {
      uint32_t dw0 = 0, dw1 = 0;

      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));

      if (intel->gen >= 6) {
	 dw0 |= GEN6_VE0_VALID;
      } else {
	 dw0 |= BRW_VE0_VALID;
	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
      }

      /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
       * the format is ignored and the value is always int.
       */

      OUT_BATCH(dw0);
      OUT_BATCH(dw1);
   }

   CACHED_BATCH();
}
Пример #9
0
static void brw_emit_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   GLuint i;

   brw_emit_query_begin(brw);

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (brw->vb.nr_enabled == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
      if (intel->gen >= 6) {
	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      }
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      CACHED_BATCH();
      return;
   }

   /* Now emit VB and VEP state packets.
    */

   if (brw->vb.nr_buffers) {
      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
	 uint32_t dw0;

	 if (intel->gen >= 6) {
	    dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT);
	 } else {
	    dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT);
	 }

	 if (intel->gen >= 7)
	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
	 if (intel->gen >= 5) {
	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
	 } else
	    OUT_BATCH(0);
	 OUT_BATCH(0); /* Instance data step rate */

	 brw->vb.current_buffers[i].handle = buffer->bo->handle;
	 brw->vb.current_buffers[i].offset = buffer->offset;
	 brw->vb.current_buffers[i].stride = buffer->stride;
      }
      brw->vb.nr_current_buffers = i;
      ADVANCE_BATCH();
   }

   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2*brw->vb.nr_enabled - 1));
   for (i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = get_surface_type(input->glarray->Type,
					 input->glarray->Size,
					 input->glarray->Format,
					 input->glarray->Normalized);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
	 break;
      }

      if (intel->gen >= 6) {
	 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      }

      if (intel->gen >= 5)
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
      else
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }
   CACHED_BATCH();
}