Example #1
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static bool brw_try_draw_prims( struct gl_context *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prim,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index )
{
   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   bool retval = true;
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the 
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures( brw );

   /* Resolves must occur after updating state and finalizing textures but
    * before setting up any hardware state for this draw call.
    */
   brw_predraw_resolve_buffers(brw);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );

   brw->ib.ib = ib;
   brw->state.dirty.brw |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->state.dirty.brw |= BRW_NEW_VERTICES;

   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
    * Note this is where brw->vs->prog_data.inputs_read is calculated,
    * so can't access it earlier.
    */

   intel_prepare_render(intel);

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
				  (sizeof(struct brw_sampler_state) +
				   sizeof(struct gen5_sampler_default_color)));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
      intel_batchbuffer_save_state(intel);

      if (intel->gen < 6)
	 brw_set_prim(brw, &prim[i]);
      else
	 gen6_set_prim(brw, &prim[i]);

retry:
      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->state.dirty.brw.
       */
      if (brw->state.dirty.brw) {
	 intel->no_batch_wrap = true;
	 brw_upload_state(brw);

	 if (unlikely(brw->intel.Fallback)) {
	    intel->no_batch_wrap = false;
	    retval = false;
	    goto out;
	 }
      }

      if (intel->gen >= 7)
	 gen7_emit_prim(brw, &prim[i], brw->primitive);
      else
	 brw_emit_prim(brw, &prim[i], brw->primitive);

      intel->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(intel);
	    intel_batchbuffer_flush(intel);
	    fail_next = true;
	    goto retry;
	 } else {
	    if (intel_batchbuffer_flush(intel) == -ENOSPC) {
	       static bool warned = false;

	       if (!warned) {
		  fprintf(stderr, "i965: Single primitive emit exceeded"
			  "available aperture space\n");
		  warned = true;
	       }

	       retval = false;
	    }
	 }
      }

      if (!_mesa_meta_in_progress(ctx))
         brw_update_primitive_count(brw, &prim[i]);
   }

   if (intel->always_flush_batch)
      intel_batchbuffer_flush(intel);
 out:

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return retval;
}
Example #2
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static void
brw_try_draw_prims(struct gl_context *ctx,
                   const struct gl_client_array *arrays[],
                   const struct _mesa_prim *prims,
                   GLuint nr_prims,
                   const struct _mesa_index_buffer *ib,
                   GLuint min_index,
                   GLuint max_index,
                   struct gl_buffer_object *indirect)
{
   struct brw_context *brw = brw_context(ctx);
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state(ctx);

   /* Find the highest sampler unit used by each shader program.  A bit-count
    * won't work since ARB programs use the texture unit number as the sampler
    * index.
    */
   brw->wm.base.sampler_count =
      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
   brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
   brw->vs.base.sampler_count =
      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures(brw);

   intel_prepare_render(brw);

   /* This workaround has to happen outside of brw_upload_render_state()
    * because it may flush the batchbuffer for a blit, affecting the state
    * flags.
    */
   brw_workaround_depthstencil_alignment(brw, 0);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs(brw, arrays);

   brw->ib.ib = ib;
   brw->ctx.NewDriverState |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->ctx.NewDriverState |= BRW_NEW_VERTICES;

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;
      const int sampler_state_size = 16;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += BRW_MAX_TEX_UNIT *
         (sampler_state_size + sizeof(struct gen5_sampler_default_color));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
      intel_batchbuffer_save_state(brw);

      if (brw->num_instances != prims[i].num_instances ||
          brw->basevertex != prims[i].basevertex) {
         brw->num_instances = prims[i].num_instances;
         brw->basevertex = prims[i].basevertex;
         if (i > 0) { /* For i == 0 we just did this before the loop */
            brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
            brw_merge_inputs(brw, arrays);
         }
      }

      brw->draw.gl_basevertex =
         prims[i].indexed ? prims[i].basevertex : prims[i].start;

      drm_intel_bo_unreference(brw->draw.draw_params_bo);

      if (prims[i].is_indirect) {
         /* Point draw_params_bo at the indirect buffer. */
         brw->draw.draw_params_bo =
            intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
         drm_intel_bo_reference(brw->draw.draw_params_bo);
         brw->draw.draw_params_offset =
            prims[i].indirect_offset + (prims[i].indexed ? 12 : 8);
      } else {
         /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
          * has to upload gl_BaseVertex and such if they're needed.
          */
         brw->draw.draw_params_bo = NULL;
         brw->draw.draw_params_offset = 0;
      }

      if (brw->gen < 6)
	 brw_set_prim(brw, &prims[i]);
      else
	 gen6_set_prim(brw, &prims[i]);

retry:

      /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->ctx.NewDriverState.
       */
      if (brw->ctx.NewDriverState) {
	 brw->no_batch_wrap = true;
	 brw_upload_render_state(brw);
      }

      brw_emit_prim(brw, &prims[i], brw->primitive);

      brw->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(brw);
	    intel_batchbuffer_flush(brw);
	    fail_next = true;
	    goto retry;
	 } else {
            int ret = intel_batchbuffer_flush(brw);
            WARN_ONCE(ret == -ENOSPC,
                      "i965: Single primitive emit exceeded "
                      "available aperture space\n");
	 }
      }

      /* Now that we know we haven't run out of aperture space, we can safely
       * reset the dirty bits.
       */
      if (brw->ctx.NewDriverState)
         brw_render_state_finished(brw);
   }

   if (brw->always_flush_batch)
      intel_batchbuffer_flush(brw);

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return;
}
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static bool brw_try_draw_prims( struct gl_context *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prims,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index,
				     struct gl_buffer_object *indirect)
{
   struct brw_context *brw = brw_context(ctx);
   bool retval = true;
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* Find the highest sampler unit used by each shader program.  A bit-count
    * won't work since ARB programs use the texture unit number as the sampler
    * index.
    */
   brw->wm.base.sampler_count =
      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
   brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
   brw->vs.base.sampler_count =
      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the 
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures( brw );

   intel_prepare_render(brw);

   /* This workaround has to happen outside of brw_upload_state() because it
    * may flush the batchbuffer for a blit, affecting the state flags.
    */
   brw_workaround_depthstencil_alignment(brw, 0);

   /* Resolves must occur after updating renderbuffers, updating context state,
    * and finalizing textures but before setting up any hardware state for
    * this draw call.
    */
   brw_predraw_resolve_buffers(brw);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );

   brw->ib.ib = ib;
   brw->state.dirty.brw |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->state.dirty.brw |= BRW_NEW_VERTICES;

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
				  (sizeof(struct brw_sampler_state) +
				   sizeof(struct gen5_sampler_default_color)));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
      intel_batchbuffer_save_state(brw);

      if (brw->num_instances != prims[i].num_instances) {
         brw->num_instances = prims[i].num_instances;
         brw->state.dirty.brw |= BRW_NEW_VERTICES;
         brw_merge_inputs(brw, arrays);
      }
      if (brw->basevertex != prims[i].basevertex) {
         brw->basevertex = prims[i].basevertex;
         brw->state.dirty.brw |= BRW_NEW_VERTICES;
         brw_merge_inputs(brw, arrays);
      }
      if (brw->gen < 6)
	 brw_set_prim(brw, &prims[i]);
      else
	 gen6_set_prim(brw, &prims[i]);

retry:
      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->state.dirty.brw.
       */
      if (brw->state.dirty.brw) {
	 brw->no_batch_wrap = true;
	 brw_upload_state(brw);
      }

      brw_emit_prim(brw, &prims[i], brw->primitive);

      brw->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(brw);
	    intel_batchbuffer_flush(brw);
	    fail_next = true;
	    goto retry;
	 } else {
	    if (intel_batchbuffer_flush(brw) == -ENOSPC) {
	       static bool warned = false;

	       if (!warned) {
		  fprintf(stderr, "i965: Single primitive emit exceeded"
			  "available aperture space\n");
		  warned = true;
	       }

	       retval = false;
	    }
	 }
      }
   }

   if (brw->always_flush_batch)
      intel_batchbuffer_flush(brw);

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return retval;
}
Example #4
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static GLboolean brw_try_draw_prims( GLcontext *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prim,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index )
{
   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   GLboolean retval = GL_FALSE;
   GLuint i, j;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );
      
   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
    * Note this is where brw->vs->prog_data.inputs_read is calculated,
    * so can't access it earlier.
    */

   LOCK_HARDWARE(intel);

   if (brw->intel.numClipRects == 0) {
      assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
      UNLOCK_HARDWARE(intel);
      return GL_TRUE;
   }

   {
      /* Set the first primitive early, ahead of validate_state:
       */
      brw_set_prim(brw, prim[0].mode);

      /* XXX:  Need to separate validate and upload of state.  
       */
      brw_validate_state( brw );

      /* Various fallback checks:
       */
      if (brw->intel.Fallback) 
	 goto out;

      if (check_fallbacks( brw, prim, nr_prims ))
	 goto out;
	  
      /* Upload index, vertex data: 
       */
      if (ib)
	 brw_upload_indices( brw, ib );

      if (!brw_upload_vertices( brw, min_index, max_index)) {
	 goto out;
      }

      /* For single cliprect, state is already emitted: 
       */
      if (brw->intel.numClipRects == 1) {
	 for (i = 0; i < nr_prims; i++) {
	    brw_emit_prim(brw, &prim[i]);   
	 }
      }
      else {
	 /* Otherwise, explicitly do the cliprects at this point:
	  */
	 for (j = 0; j < brw->intel.numClipRects; j++) {
	    brw_emit_cliprect(brw, &brw->intel.pClipRects[j]);

	    /* Emit prims to batchbuffer: 
	     */
	    for (i = 0; i < nr_prims; i++) {
	       brw_emit_prim(brw, &prim[i]);   
	    }
	 }
      }
      
      intel->need_flush = GL_TRUE;
      retval = GL_TRUE;
   }

 out:

   /* Currently have to do this to synchronize with the map/unmap of
    * the vertex buffer in brw_exec_api.c.  Not sure if there is any
    * way around this, as not every flush is due to a buffer filling
    * up.
    */
   if (!intel_batchbuffer_flush( brw->intel.batch )) {
      DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__);
      retval = GL_FALSE;
   }

   if (retval && intel->thrashing) {
      bmSetFence(intel);
   }

   /* Free any old data so it doesn't clog up texture memory - we
    * won't be referencing it again.
    */
   while (brw->vb.upload.wrap != brw->vb.upload.buf) {
      ctx->Driver.BufferData(ctx,
			     GL_ARRAY_BUFFER_ARB,
			     BRW_UPLOAD_INIT_SIZE,
			     NULL,
			     GL_DYNAMIC_DRAW_ARB,
			     brw->vb.upload.vbo[brw->vb.upload.wrap]);
      brw->vb.upload.wrap++;
      brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS;
   }

   UNLOCK_HARDWARE(intel);

   if (!retval)
      DBG("%s failed\n", __FUNCTION__);

   return retval;
}
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static GLboolean brw_try_draw_prims( GLcontext *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prim,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index )
{
   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   GLboolean retval = GL_FALSE;
   GLuint i;
   GLuint ib_offset;
   dri_bo *ib_bo;
   GLboolean force_flush = GL_FALSE;
   int ret;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   brw_validate_textures( brw );

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );
      
   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
    * Note this is where brw->vs->prog_data.inputs_read is calculated,
    * so can't access it earlier.
    */

   LOCK_HARDWARE(intel);

   if (brw->intel.numClipRects == 0) {
      UNLOCK_HARDWARE(intel);
      return GL_TRUE;
   }

   {
      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.  This fraction is just a guess (minimal full state plus
       * a primitive is around 512 bytes), and would be better if we had
       * an upper bound of how much we might emit in a single
       * brw_try_draw_prims().
       */
   flush:
      if (force_flush)
         brw->no_batch_wrap = GL_FALSE;

      if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4
	/* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */
	  || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE))
	      intel_batchbuffer_flush(intel->batch);

      force_flush = GL_FALSE;
      brw->no_batch_wrap = GL_TRUE;

      /* Set the first primitive early, ahead of validate_state:
       */
      brw_set_prim(brw, prim[0].mode, &force_flush);

      /* XXX:  Need to separate validate and upload of state.  
       */
      ret = brw_validate_state( brw );
      if (ret) {
         force_flush = GL_TRUE;
         goto flush;
      }

      /* Various fallback checks:
       */
      if (brw->intel.Fallback) 
	 goto out;

      if (check_fallbacks( brw, prim, nr_prims ))
	 goto out;

      /* need to account for index buffer and vertex buffer */
      if (ib) {
         ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset);
         if (ret) {
            force_flush = GL_TRUE;
            goto flush;
         }
      }

      ret = brw_prepare_vertices( brw, min_index, max_index);
      if (ret < 0)
         goto out;

      if (ret > 0) {
         force_flush = GL_TRUE;
         goto flush;
      }
	  
      /* Upload index, vertex data: 
       */
      if (ib)
	brw_emit_indices( brw, ib, ib_bo, ib_offset);

      brw_emit_vertices( brw, min_index, max_index);

      for (i = 0; i < nr_prims; i++) {
	 brw_emit_prim(brw, &prim[i]);
      }

      retval = GL_TRUE;
   }

 out:

   brw->no_batch_wrap = GL_FALSE;

   UNLOCK_HARDWARE(intel);

   if (!retval)
      DBG("%s failed\n", __FUNCTION__);

   return retval;
}
Example #6
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static GLboolean brw_try_draw_prims( struct gl_context *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prim,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index )
{
   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   GLboolean retval = GL_FALSE;
   GLboolean warn = GL_FALSE;
   GLuint i;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the 
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures( brw );

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );

   brw->ib.ib = ib;
   brw->state.dirty.brw |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->state.dirty.brw |= BRW_NEW_VERTICES;

   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
    * Note this is where brw->vs->prog_data.inputs_read is calculated,
    * so can't access it earlier.
    */

   intel_prepare_render(intel);

   for (i = 0; i < nr_prims; i++) {
      uint32_t hw_prim;
      int estimated_max_prim_size;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
				  (sizeof(struct brw_sampler_state) +
				   sizeof(struct gen5_sampler_default_color)));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);

      hw_prim = brw_set_prim(brw, &prim[i]);
      if (brw->state.dirty.brw) {
	 brw_validate_state(brw);

	 /* Various fallback checks:  */
	 if (brw->intel.Fallback)
	    goto out;

	 /* Check that we can fit our state in with our existing batchbuffer, or
	  * flush otherwise.
	  */
	 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
					     brw->state.validated_bo_count)) {
	    static GLboolean warned;
	    intel_batchbuffer_flush(intel);

	    /* Validate the state after we flushed the batch (which would have
	     * changed the set of dirty state).  If we still fail to
	     * check_aperture, warn of what's happening, but attempt to continue
	     * on since it may succeed anyway, and the user would probably rather
	     * see a failure and a warning than a fallback.
	     */
	    brw_validate_state(brw);
	    if (!warned &&
		dri_bufmgr_check_aperture_space(brw->state.validated_bos,
						brw->state.validated_bo_count)) {
	       warn = GL_TRUE;
	       warned = GL_TRUE;
	    }
	 }

	 intel->no_batch_wrap = GL_TRUE;
	 brw_upload_state(brw);
      }

      if (intel->gen >= 7)
	 gen7_emit_prim(brw, &prim[i], hw_prim);
      else
	 brw_emit_prim(brw, &prim[i], hw_prim);

      intel->no_batch_wrap = GL_FALSE;

      retval = GL_TRUE;
   }

   if (intel->always_flush_batch)
      intel_batchbuffer_flush(intel);
 out:

   brw_state_cache_check_size(brw);

   if (warn)
      fprintf(stderr, "i965: Single primitive emit potentially exceeded "
	      "available aperture space\n");

   if (!retval)
      DBG("%s failed\n", __FUNCTION__);

   return retval;
}