Exemplo n.º 1
0
void
intel_batchbuffer_data(struct intel_batchbuffer *batch,
                       const void *data, GLuint bytes, GLuint flags)
{
   assert((bytes & 3) == 0);
   intel_batchbuffer_require_space(batch, bytes, flags);
   __memcpy(batch->ptr, data, bytes);
   batch->ptr += bytes;
}
Exemplo n.º 2
0
void
intel_batchbuffer_data(struct intel_batchbuffer *batch,
                       const void *data, unsigned int bytes)
{
	assert((bytes & 3) == 0);
	intel_batchbuffer_require_space(batch, bytes);
	memcpy(batch->ptr, data, bytes);
	batch->ptr += bytes;
}
Exemplo n.º 3
0
static void
intel_batchbuffer_start_atomic_helper(struct intel_batchbuffer *batch,
                                      int flag,
                                      unsigned int size)
{
    assert(!batch->atomic);
    intel_batchbuffer_check_batchbuffer_flag(batch, flag);
    intel_batchbuffer_require_space(batch, size);
    batch->atomic = 1;
}
Exemplo n.º 4
0
void 
intel_batchbuffer_data(struct intel_batchbuffer *batch,
                          void *data,
                          unsigned int size)
{
    assert((size & 3) == 0);
    intel_batchbuffer_require_space(batch, size);

    assert(batch->ptr);
    memcpy(batch->ptr, data, size);
    batch->ptr += size;
}
void
brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
{
   struct gl_context *ctx = &brw->ctx;
   uint32_t estimated_max_batch_usage = 1500;
   bool check_aperture_failed_once = false;

   /* Flush the sampler and render caches.  We definitely need to flush the
    * sampler cache so that we get updated contents from the render cache for
    * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
    * docs to flush the cache between reinterpretations of the same surface
    * data with different formats, which blorp does for stencil and depth
    * data.
    */
   brw_emit_mi_flush(brw);

retry:
   intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
   intel_batchbuffer_save_state(brw);
   drm_intel_bo *saved_bo = brw->batch.bo;
   uint32_t saved_used = USED_BATCH(brw->batch);
   uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;

   switch (brw->gen) {
   case 6:
      gen6_blorp_exec(brw, params);
      break;
   case 7:
      gen7_blorp_exec(brw, params);
      break;
   default:
      /* BLORP is not supported before Gen6. */
      unreachable("not reached");
   }

   /* Make sure we didn't wrap the batch unintentionally, and make sure we
    * reserved enough space that a wrap will never happen.
    */
   assert(brw->batch.bo == saved_bo);
   assert((USED_BATCH(brw->batch) - saved_used) * 4 +
          (saved_state_batch_offset - brw->batch.state_batch_offset) <
          estimated_max_batch_usage);
   /* Shut up compiler warnings on release build */
   (void)saved_bo;
   (void)saved_used;
   (void)saved_state_batch_offset;

   /* Check if the blorp op we just did would make our batch likely to fail to
    * map all the BOs into the GPU at batch exec time later.  If so, flush the
    * batch and try again with nothing else in the batch.
    */
   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
      if (!check_aperture_failed_once) {
         check_aperture_failed_once = true;
         intel_batchbuffer_reset_to_saved(brw);
         intel_batchbuffer_flush(brw);
         goto retry;
      } else {
         int ret = intel_batchbuffer_flush(brw);
         WARN_ONCE(ret == -ENOSPC,
                   "i965: blorp emit exceeded available aperture space\n");
      }
   }

   if (unlikely(brw->always_flush_batch))
      intel_batchbuffer_flush(brw);

   /* We've smashed all state compared to what the normal 3D pipeline
    * rendering tracks for GL.
    */
   brw->ctx.NewDriverState = ~0ull;
   brw->no_depth_or_stencil = false;
   brw->ib.type = -1;

   /* Flush the sampler cache so any texturing from the destination is
    * coherent.
    */
   brw_emit_mi_flush(brw);
}
Exemplo n.º 6
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static GLboolean brw_try_draw_prims( struct gl_context *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prim,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index )
{
   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   GLboolean retval = GL_FALSE;
   GLboolean warn = GL_FALSE;
   GLuint i;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the 
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures( brw );

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );

   brw->ib.ib = ib;
   brw->state.dirty.brw |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->state.dirty.brw |= BRW_NEW_VERTICES;

   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
    * Note this is where brw->vs->prog_data.inputs_read is calculated,
    * so can't access it earlier.
    */

   intel_prepare_render(intel);

   for (i = 0; i < nr_prims; i++) {
      uint32_t hw_prim;
      int estimated_max_prim_size;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
				  (sizeof(struct brw_sampler_state) +
				   sizeof(struct gen5_sampler_default_color)));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);

      hw_prim = brw_set_prim(brw, &prim[i]);
      if (brw->state.dirty.brw) {
	 brw_validate_state(brw);

	 /* Various fallback checks:  */
	 if (brw->intel.Fallback)
	    goto out;

	 /* Check that we can fit our state in with our existing batchbuffer, or
	  * flush otherwise.
	  */
	 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
					     brw->state.validated_bo_count)) {
	    static GLboolean warned;
	    intel_batchbuffer_flush(intel);

	    /* Validate the state after we flushed the batch (which would have
	     * changed the set of dirty state).  If we still fail to
	     * check_aperture, warn of what's happening, but attempt to continue
	     * on since it may succeed anyway, and the user would probably rather
	     * see a failure and a warning than a fallback.
	     */
	    brw_validate_state(brw);
	    if (!warned &&
		dri_bufmgr_check_aperture_space(brw->state.validated_bos,
						brw->state.validated_bo_count)) {
	       warn = GL_TRUE;
	       warned = GL_TRUE;
	    }
	 }

	 intel->no_batch_wrap = GL_TRUE;
	 brw_upload_state(brw);
      }

      if (intel->gen >= 7)
	 gen7_emit_prim(brw, &prim[i], hw_prim);
      else
	 brw_emit_prim(brw, &prim[i], hw_prim);

      intel->no_batch_wrap = GL_FALSE;

      retval = GL_TRUE;
   }

   if (intel->always_flush_batch)
      intel_batchbuffer_flush(intel);
 out:

   brw_state_cache_check_size(brw);

   if (warn)
      fprintf(stderr, "i965: Single primitive emit potentially exceeded "
	      "available aperture space\n");

   if (!retval)
      DBG("%s failed\n", __FUNCTION__);

   return retval;
}
/* Push the state into the sarea and/or texture memory.
 */
static void
i915_emit_state(struct intel_context *intel)
{
   struct i915_context *i915 = i915_context(&intel->ctx);
   struct i915_hw_state *state = i915->current;
   int i;
   int ret, count;
   GLuint dirty;
   GET_CURRENT_CONTEXT(ctx);
   BATCH_LOCALS;

   /* We don't hold the lock at this point, so want to make sure that
    * there won't be a buffer wrap between the state emits and the primitive
    * emit header.
    *
    * It might be better to talk about explicit places where
    * scheduling is allowed, rather than assume that it is whenever a
    * batchbuffer fills up.
    *
    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
    * will be emitted under.
    */
   intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
				   LOOP_CLIPRECTS);
   count = 0;
 again:
   dirty = get_dirty(state);

   ret = 0;
   if (dirty & I915_UPLOAD_BUFFERS) {
     ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
     if (state->depth_region)
        ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
   }

   if (dirty & I915_UPLOAD_TEX_ALL) {
     for (i = 0; i < I915_TEX_UNITS; i++)
       if (dirty & I915_UPLOAD_TEX(i)) {
	   if (state->tex_buffer[i]) {
	       ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
	   }
       }
   }
   if (ret) {
       if (count == 0) {
	   count++;
	   intel_batchbuffer_flush(intel->batch);
	   goto again;
       } else {
	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
	   assert(0);
       }
   }

   /* work out list of buffers to emit */
   
   /* Do this here as we may have flushed the batchbuffer above,
    * causing more state to be dirty!
    */
   dirty = get_dirty(state);
   state->emitted |= dirty;
   assert(get_dirty(state) == 0);

   if (INTEL_DEBUG & DEBUG_STATE)
      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);

   if (dirty & I915_UPLOAD_INVARIENT) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
      i915_emit_invarient_state(intel);
   }

   if (dirty & I915_UPLOAD_CTX) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_CTX:\n");

      emit(intel, state->Ctx, sizeof(state->Ctx));
   }

   if (dirty & I915_UPLOAD_BUFFERS) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
      BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
      OUT_RELOC(state->draw_region->buffer,
                DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                state->draw_region->draw_offset);

      if (state->depth_region) {
         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
         OUT_RELOC(state->depth_region->buffer,
                   DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                   state->depth_region->draw_offset);
      }

      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
      ADVANCE_BATCH();
   }

   if (dirty & I915_UPLOAD_STIPPLE) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
      emit(intel, state->Stipple, sizeof(state->Stipple));
   }

   if (dirty & I915_UPLOAD_FOG) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_FOG:\n");
      emit(intel, state->Fog, sizeof(state->Fog));
   }

   /* Combine all the dirty texture state into a single command to
    * avoid lockups on I915 hardware. 
    */
   if (dirty & I915_UPLOAD_TEX_ALL) {
      int nr = 0;

      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i))
            nr++;

      BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS);
      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {

            if (state->tex_buffer[i]) {
               OUT_RELOC(state->tex_buffer[i],
                         DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
                         state->tex_offset[i]);
            }
            else if (state == &i915->meta) {
               assert(i == 0);
               OUT_BATCH(0);
            }
            else {
               OUT_BATCH(state->tex_offset[i]);
            }

            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
         }
      ADVANCE_BATCH();

      BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS);
      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
         }
      ADVANCE_BATCH();
   }
Exemplo n.º 8
0
/* Copy BitBlt
 */
bool
intelEmitCopyBlit(struct intel_context *intel,
		  GLuint cpp,
		  GLshort src_pitch,
		  drm_intel_bo *src_buffer,
		  GLuint src_offset,
		  uint32_t src_tiling,
		  GLshort dst_pitch,
		  drm_intel_bo *dst_buffer,
		  GLuint dst_offset,
		  uint32_t dst_tiling,
		  GLshort src_x, GLshort src_y,
		  GLshort dst_x, GLshort dst_y,
		  GLshort w, GLshort h,
		  GLenum logic_op)
{
   GLuint CMD, BR13, pass = 0;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
   drm_intel_bo *aper_array[3];
   bool dst_y_tiled = dst_tiling == I915_TILING_Y;
   bool src_y_tiled = src_tiling == I915_TILING_Y;
   BATCH_LOCALS;

   if (dst_tiling != I915_TILING_NONE) {
      if (dst_offset & 4095)
	 return false;
   }
   if (src_tiling != I915_TILING_NONE) {
      if (src_offset & 4095)
	 return false;
   }
   if (dst_y_tiled || src_y_tiled)
      return false;

   /* do space check before going any further */
   do {
       aper_array[0] = intel->batch.bo;
       aper_array[1] = dst_buffer;
       aper_array[2] = src_buffer;

       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
           intel_batchbuffer_flush(intel);
           pass++;
       } else
           break;
   } while (pass < 2);

   if (pass >= 2)
      return false;

   intel_batchbuffer_require_space(intel, 8 * 4);
   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
       __FUNCTION__,
       src_buffer, src_pitch, src_offset, src_x, src_y,
       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
    * the low bits.
    */
   if (src_pitch % 4 != 0 || dst_pitch % 4 != 0)
      return false;

   /* For big formats (such as floating point), do the copy using 16 or 32bpp
    * and multiply the coordinates.
    */
   if (cpp > 4) {
      if (cpp % 4 == 2) {
         dst_x *= cpp / 2;
         dst_x2 *= cpp / 2;
         src_x *= cpp / 2;
         cpp = 2;
      } else {
         assert(cpp % 4 == 0);
         dst_x *= cpp / 4;
         dst_x2 *= cpp / 4;
         src_x *= cpp / 4;
         cpp = 4;
      }
   }

   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;

   switch (cpp) {
   case 1:
   case 2:
      CMD = XY_SRC_COPY_BLT_CMD;
      break;
   case 4:
      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      break;
   default:
      return false;
   }

   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
      return true;
   }

   assert(dst_x < dst_x2);
   assert(dst_y < dst_y2);

   BEGIN_BATCH(8);

   OUT_BATCH(CMD | (8 - 2));
   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
   OUT_BATCH((dst_y << 16) | dst_x);
   OUT_BATCH((dst_y2 << 16) | dst_x2);
   OUT_RELOC_FENCED(dst_buffer,
		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		    dst_offset);
   OUT_BATCH((src_y << 16) | src_x);
   OUT_BATCH((uint16_t)src_pitch);
   OUT_RELOC_FENCED(src_buffer,
		    I915_GEM_DOMAIN_RENDER, 0,
		    src_offset);

   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel);

   return true;
}
Exemplo n.º 9
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static void
brw_try_draw_prims(struct gl_context *ctx,
                   const struct gl_client_array *arrays[],
                   const struct _mesa_prim *prims,
                   GLuint nr_prims,
                   const struct _mesa_index_buffer *ib,
                   GLuint min_index,
                   GLuint max_index,
                   struct gl_buffer_object *indirect)
{
   struct brw_context *brw = brw_context(ctx);
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state(ctx);

   /* Find the highest sampler unit used by each shader program.  A bit-count
    * won't work since ARB programs use the texture unit number as the sampler
    * index.
    */
   brw->wm.base.sampler_count =
      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
   brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
   brw->vs.base.sampler_count =
      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures(brw);

   intel_prepare_render(brw);

   /* This workaround has to happen outside of brw_upload_render_state()
    * because it may flush the batchbuffer for a blit, affecting the state
    * flags.
    */
   brw_workaround_depthstencil_alignment(brw, 0);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs(brw, arrays);

   brw->ib.ib = ib;
   brw->ctx.NewDriverState |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->ctx.NewDriverState |= BRW_NEW_VERTICES;

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;
      const int sampler_state_size = 16;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += BRW_MAX_TEX_UNIT *
         (sampler_state_size + sizeof(struct gen5_sampler_default_color));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
      intel_batchbuffer_save_state(brw);

      if (brw->num_instances != prims[i].num_instances ||
          brw->basevertex != prims[i].basevertex) {
         brw->num_instances = prims[i].num_instances;
         brw->basevertex = prims[i].basevertex;
         if (i > 0) { /* For i == 0 we just did this before the loop */
            brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
            brw_merge_inputs(brw, arrays);
         }
      }

      brw->draw.gl_basevertex =
         prims[i].indexed ? prims[i].basevertex : prims[i].start;

      drm_intel_bo_unreference(brw->draw.draw_params_bo);

      if (prims[i].is_indirect) {
         /* Point draw_params_bo at the indirect buffer. */
         brw->draw.draw_params_bo =
            intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
         drm_intel_bo_reference(brw->draw.draw_params_bo);
         brw->draw.draw_params_offset =
            prims[i].indirect_offset + (prims[i].indexed ? 12 : 8);
      } else {
         /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
          * has to upload gl_BaseVertex and such if they're needed.
          */
         brw->draw.draw_params_bo = NULL;
         brw->draw.draw_params_offset = 0;
      }

      if (brw->gen < 6)
	 brw_set_prim(brw, &prims[i]);
      else
	 gen6_set_prim(brw, &prims[i]);

retry:

      /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->ctx.NewDriverState.
       */
      if (brw->ctx.NewDriverState) {
	 brw->no_batch_wrap = true;
	 brw_upload_render_state(brw);
      }

      brw_emit_prim(brw, &prims[i], brw->primitive);

      brw->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(brw);
	    intel_batchbuffer_flush(brw);
	    fail_next = true;
	    goto retry;
	 } else {
            int ret = intel_batchbuffer_flush(brw);
            WARN_ONCE(ret == -ENOSPC,
                      "i965: Single primitive emit exceeded "
                      "available aperture space\n");
	 }
      }

      /* Now that we know we haven't run out of aperture space, we can safely
       * reset the dirty bits.
       */
      if (brw->ctx.NewDriverState)
         brw_render_state_finished(brw);
   }

   if (brw->always_flush_batch)
      intel_batchbuffer_flush(brw);

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return;
}
Exemplo n.º 10
0
bool
intelEmitImmediateColorExpandBlit(struct brw_context *brw,
				  GLuint cpp,
				  GLubyte *src_bits, GLuint src_size,
				  GLuint fg_color,
				  GLshort dst_pitch,
				  struct brw_bo *dst_buffer,
				  GLuint dst_offset,
				  enum isl_tiling dst_tiling,
				  GLshort x, GLshort y,
				  GLshort w, GLshort h,
				  enum gl_logicop_mode logic_op)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   int dwords = ALIGN(src_size, 8) / 4;
   uint32_t opcode, br13, blit_cmd;

   if (dst_tiling != ISL_TILING_LINEAR) {
      if (dst_offset & 4095)
	 return false;
      if (dst_tiling == ISL_TILING_Y0)
	 return false;
   }

   assert((unsigned) logic_op <= 0x0f);
   assert(dst_pitch > 0);

   if (w < 0 || h < 0)
      return true;

   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
       __func__,
       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);

   unsigned xy_setup_blt_length = devinfo->gen >= 8 ? 10 : 8;
   intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) +
                                        (3 * 4) + dwords * 4);

   opcode = XY_SETUP_BLT_CMD;
   if (cpp == 4)
      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
   if (dst_tiling != ISL_TILING_LINEAR) {
      opcode |= XY_DST_TILED;
      dst_pitch /= 4;
   }

   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
   br13 |= br13_for_cpp(cpp);

   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
   if (dst_tiling != ISL_TILING_LINEAR)
      blit_cmd |= XY_DST_TILED;

   BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
   OUT_BATCH(opcode | (xy_setup_blt_length - 2));
   OUT_BATCH(br13);
   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
   if (devinfo->gen >= 8) {
      OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
   } else {
      OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
   }
   OUT_BATCH(0); /* bg */
   OUT_BATCH(fg_color); /* fg */
   OUT_BATCH(0); /* pattern base addr */
   if (devinfo->gen >= 8)
      OUT_BATCH(0);

   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
   OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X));
   OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
   ADVANCE_BATCH();

   intel_batchbuffer_data(brw, src_bits, dwords * 4);

   brw_emit_mi_flush(brw);

   return true;
}
Exemplo n.º 11
0
/* Copy BitBlt
 */
static bool
emit_copy_blit(struct brw_context *brw,
               GLuint cpp,
               int32_t src_pitch,
               struct brw_bo *src_buffer,
               GLuint src_offset,
               enum isl_tiling src_tiling,
               int32_t dst_pitch,
               struct brw_bo *dst_buffer,
               GLuint dst_offset,
               enum isl_tiling dst_tiling,
               GLshort src_x, GLshort src_y,
               GLshort dst_x, GLshort dst_y,
               GLshort w, GLshort h,
               enum gl_logicop_mode logic_op)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   GLuint CMD, BR13;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
   bool dst_y_tiled = dst_tiling == ISL_TILING_Y0;
   bool src_y_tiled = src_tiling == ISL_TILING_Y0;
   uint32_t src_tile_w, src_tile_h;
   uint32_t dst_tile_w, dst_tile_h;

   if ((dst_y_tiled || src_y_tiled) && devinfo->gen < 6)
      return false;

   const unsigned bo_sizes = dst_buffer->size + src_buffer->size;

   /* do space check before going any further */
   if (!brw_batch_has_aperture_space(brw, bo_sizes))
      intel_batchbuffer_flush(brw);

   if (!brw_batch_has_aperture_space(brw, bo_sizes))
      return false;

   unsigned length = devinfo->gen >= 8 ? 10 : 8;

   intel_batchbuffer_require_space(brw, length * 4);
   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
       __func__,
       src_buffer, src_pitch, src_offset, src_x, src_y,
       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

   intel_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h);
   intel_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h);

   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
    * (X direction width of the Tile). This is ensured while allocating the
    * buffer object.
    */
   assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
   assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);

   /* For big formats (such as floating point), do the copy using 16 or
    * 32bpp and multiply the coordinates.
    */
   if (cpp > 4) {
      if (cpp % 4 == 2) {
         dst_x *= cpp / 2;
         dst_x2 *= cpp / 2;
         src_x *= cpp / 2;
         cpp = 2;
      } else {
         assert(cpp % 4 == 0);
         dst_x *= cpp / 4;
         dst_x2 *= cpp / 4;
         src_x *= cpp / 4;
         cpp = 4;
      }
   }

   if (!alignment_valid(brw, dst_offset, dst_tiling))
      return false;
   if (!alignment_valid(brw, src_offset, src_tiling))
      return false;

   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
    * the low bits.  Offsets must be naturally aligned.
    */
   if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
       dst_pitch % 4 != 0 || dst_offset % cpp != 0)
      return false;

   assert(cpp <= 4);
   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;

   CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp);

   /* For tiled source and destination, pitch value should be specified
    * as a number of Dwords.
    */
   if (dst_tiling != ISL_TILING_LINEAR)
      dst_pitch /= 4;

   if (src_tiling != ISL_TILING_LINEAR)
      src_pitch /= 4;

   if (dst_y2 <= dst_y || dst_x2 <= dst_x)
      return true;

   assert(dst_x < dst_x2);
   assert(dst_y < dst_y2);

   BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
   OUT_BATCH(CMD | (length - 2));
   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
   OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X));
   OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X));
   if (devinfo->gen >= 8) {
      OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
   } else {
      OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
   }
   OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X));
   OUT_BATCH((uint16_t)src_pitch);
   if (devinfo->gen >= 8) {
      OUT_RELOC64(src_buffer, 0, src_offset);
   } else {
      OUT_RELOC(src_buffer, 0, src_offset);
   }

   ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);

   brw_emit_mi_flush(brw);

   return true;
}
Exemplo n.º 12
0
void
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
                                  GLuint cpp,
                                  GLubyte *src_bits, GLuint src_size,
                                  GLuint fg_color,
                                  GLshort dst_pitch,
                                  struct buffer *dst_buffer,
                                  GLuint dst_offset,
                                  GLboolean dst_tiled,
                                  GLshort x, GLshort y,
                                  GLshort w, GLshort h,
                                  GLenum logic_op)
{
    struct xy_setup_blit setup;
    struct xy_text_immediate_blit text;
    int dwords = ((src_size + 7) & ~7) / 4;

    assert( logic_op - GL_CLEAR >= 0 );
    assert( logic_op - GL_CLEAR < 0x10 );

    if (w < 0 || h < 0)
        return;

    dst_pitch *= cpp;

    if (dst_tiled)
        dst_pitch /= 4;

    DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
        __FUNCTION__,
        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);

    memset(&setup, 0, sizeof(setup));

    setup.br0.client = CLIENT_2D;
    setup.br0.opcode = OPCODE_XY_SETUP_BLT;
    setup.br0.write_alpha = (cpp == 4);
    setup.br0.write_rgb = (cpp == 4);
    setup.br0.dst_tiled = dst_tiled;
    setup.br0.length = (sizeof(setup) / sizeof(int)) - 2;

    setup.br13.dest_pitch = dst_pitch;
    setup.br13.rop = translate_raster_op(logic_op);
    setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565;
    setup.br13.clipping_enable = 0;
    setup.br13.mono_source_transparency = 1;

    setup.dw2.clip_y1 = 0;
    setup.dw2.clip_x1 = 0;
    setup.dw3.clip_y2 = 100;
    setup.dw3.clip_x2 = 100;

    setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset;
    setup.background_color = 0;
    setup.foreground_color = fg_color;
    setup.pattern_base_addr = 0;

    memset(&text, 0, sizeof(text));
    text.dw0.client = CLIENT_2D;
    text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT;
    text.dw0.pad0 = 0;
    text.dw0.byte_packed = 1;	/* ?maybe? */
    text.dw0.pad1 = 0;
    text.dw0.dst_tiled = dst_tiled;
    text.dw0.pad2 = 0;
    text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords;
    text.dw1.dest_y1 = y;	/* duplicates info in setup blit */
    text.dw1.dest_x1 = x;
    text.dw2.dest_y2 = y + h;
    text.dw2.dest_x2 = x + w;

    intel_batchbuffer_require_space( intel->batch,
                                     sizeof(setup) +
                                     sizeof(text) +
                                     dwords,
                                     INTEL_BATCH_NO_CLIPRECTS );

    intel_batchbuffer_data( intel->batch,
                            &setup,
                            sizeof(setup),
                            INTEL_BATCH_NO_CLIPRECTS );

    intel_batchbuffer_data( intel->batch,
                            &text,
                            sizeof(text),
                            INTEL_BATCH_NO_CLIPRECTS );

    intel_batchbuffer_data( intel->batch,
                            src_bits,
                            dwords * 4,
                            INTEL_BATCH_NO_CLIPRECTS );
}
Exemplo n.º 13
0
/* Push the state into the sarea and/or texture memory.
 */
static void
i915_emit_state(struct intel_context *intel)
{
   struct i915_context *i915 = i915_context(&intel->ctx);
   struct i915_hw_state *state = i915->current;
   int i;
   GLuint dirty;
   BATCH_LOCALS;

   /* We don't hold the lock at this point, so want to make sure that
    * there won't be a buffer wrap.  
    *
    * It might be better to talk about explicit places where
    * scheduling is allowed, rather than assume that it is whenever a
    * batchbuffer fills up.
    */
   intel_batchbuffer_require_space(intel->batch, get_state_size(state), 0);

   /* Do this here as we may have flushed the batchbuffer above,
    * causing more state to be dirty!
    */
   dirty = get_dirty(state);

   if (INTEL_DEBUG & DEBUG_STATE)
      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);

   if (dirty & I915_UPLOAD_INVARIENT) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
      i915_emit_invarient_state(intel);
   }

   if (dirty & I915_UPLOAD_CTX) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_CTX:\n");

      emit(intel, state->Ctx, sizeof(state->Ctx));
   }

   if (dirty & I915_UPLOAD_BUFFERS) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
      BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, 0);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
      OUT_RELOC(state->draw_region->buffer,
                DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE,
                state->draw_region->draw_offset);

      if (state->depth_region) {
         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
         OUT_RELOC(state->depth_region->buffer,
                   DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                   DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE,
                   state->depth_region->draw_offset);
      }

      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
      ADVANCE_BATCH();
   }

   if (dirty & I915_UPLOAD_STIPPLE) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
      emit(intel, state->Stipple, sizeof(state->Stipple));
   }

   if (dirty & I915_UPLOAD_FOG) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_FOG:\n");
      emit(intel, state->Fog, sizeof(state->Fog));
   }

   /* Combine all the dirty texture state into a single command to
    * avoid lockups on I915 hardware. 
    */
   if (dirty & I915_UPLOAD_TEX_ALL) {
      int nr = 0;

      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i))
            nr++;

      BEGIN_BATCH(2 + nr * 3, 0);
      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {

            if (state->tex_buffer[i]) {
               OUT_RELOC(state->tex_buffer[i],
                         DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
                         DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
                         state->tex_offset[i]);
            }
            else {
               assert(i == 0);
               assert(state == &i915->meta);
               OUT_BATCH(0);
            }

            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
         }
      ADVANCE_BATCH();

      BEGIN_BATCH(2 + nr * 3, 0);
      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
         }
      ADVANCE_BATCH();
   }
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static bool brw_try_draw_prims( struct gl_context *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prims,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index,
				     struct gl_buffer_object *indirect)
{
   struct brw_context *brw = brw_context(ctx);
   bool retval = true;
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* Find the highest sampler unit used by each shader program.  A bit-count
    * won't work since ARB programs use the texture unit number as the sampler
    * index.
    */
   brw->wm.base.sampler_count =
      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
   brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
   brw->vs.base.sampler_count =
      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the 
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures( brw );

   intel_prepare_render(brw);

   /* This workaround has to happen outside of brw_upload_state() because it
    * may flush the batchbuffer for a blit, affecting the state flags.
    */
   brw_workaround_depthstencil_alignment(brw, 0);

   /* Resolves must occur after updating renderbuffers, updating context state,
    * and finalizing textures but before setting up any hardware state for
    * this draw call.
    */
   brw_predraw_resolve_buffers(brw);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );

   brw->ib.ib = ib;
   brw->state.dirty.brw |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->state.dirty.brw |= BRW_NEW_VERTICES;

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
				  (sizeof(struct brw_sampler_state) +
				   sizeof(struct gen5_sampler_default_color)));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
      intel_batchbuffer_save_state(brw);

      if (brw->num_instances != prims[i].num_instances) {
         brw->num_instances = prims[i].num_instances;
         brw->state.dirty.brw |= BRW_NEW_VERTICES;
         brw_merge_inputs(brw, arrays);
      }
      if (brw->basevertex != prims[i].basevertex) {
         brw->basevertex = prims[i].basevertex;
         brw->state.dirty.brw |= BRW_NEW_VERTICES;
         brw_merge_inputs(brw, arrays);
      }
      if (brw->gen < 6)
	 brw_set_prim(brw, &prims[i]);
      else
	 gen6_set_prim(brw, &prims[i]);

retry:
      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->state.dirty.brw.
       */
      if (brw->state.dirty.brw) {
	 brw->no_batch_wrap = true;
	 brw_upload_state(brw);
      }

      brw_emit_prim(brw, &prims[i], brw->primitive);

      brw->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(brw);
	    intel_batchbuffer_flush(brw);
	    fail_next = true;
	    goto retry;
	 } else {
	    if (intel_batchbuffer_flush(brw) == -ENOSPC) {
	       static bool warned = false;

	       if (!warned) {
		  fprintf(stderr, "i965: Single primitive emit exceeded"
			  "available aperture space\n");
		  warned = true;
	       }

	       retval = false;
	    }
	 }
      }
   }

   if (brw->always_flush_batch)
      intel_batchbuffer_flush(brw);

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return retval;
}
Exemplo n.º 15
0
/* Copy BitBlt
 */
void
intelEmitCopyBlit(struct intel_context *intel,
		  GLuint cpp,
		  GLshort src_pitch,
		  dri_bo *src_buffer,
		  GLuint src_offset,
		  GLboolean src_tiled,
		  GLshort dst_pitch,
		  dri_bo *dst_buffer,
		  GLuint dst_offset,
		  GLboolean dst_tiled,
		  GLshort src_x, GLshort src_y,
		  GLshort dst_x, GLshort dst_y,
		  GLshort w, GLshort h,
		  GLenum logic_op)
{
   GLuint CMD, BR13;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
   int ret;
   BATCH_LOCALS;

   /* do space/cliprects check before going any further */
   intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
 again:
   ret = dri_bufmgr_check_aperture_space(dst_buffer);
   ret |= dri_bufmgr_check_aperture_space(src_buffer);
   if (ret) {
     intel_batchbuffer_flush(intel->batch);
     goto again;
   }

   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
       __FUNCTION__,
       src_buffer, src_pitch, src_offset, src_x, src_y,
       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

   src_pitch *= cpp;
   dst_pitch *= cpp;

   BR13 = translate_raster_op(logic_op) << 16;

   switch (cpp) {
   case 1:
   case 2:
   case 3:
      BR13 |= (1 << 24);
      CMD = XY_SRC_COPY_BLT_CMD;
      break;
   case 4:
      BR13 |= (1 << 24) | (1 << 25);
      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      break;
   default:
      return;
   }

#ifndef I915
   if (dst_tiled) {
      CMD |= XY_DST_TILED;
      dst_pitch /= 4;
   }
   if (src_tiled) {
      CMD |= XY_SRC_TILED;
      src_pitch /= 4;
   }
#endif

   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
      return;
   }

   /* Initial y values don't seem to work with negative pitches.  If
    * we adjust the offsets manually (below), it seems to work fine.
    *
    * On the other hand, if we always adjust, the hardware doesn't
    * know which blit directions to use, so overlapping copypixels get
    * the wrong result.
    */
   if (dst_pitch > 0 && src_pitch > 0) {
      assert(dst_x < dst_x2);
      assert(dst_y < dst_y2);

      BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
      OUT_BATCH(CMD);
      OUT_BATCH(BR13 | dst_pitch);
      OUT_BATCH((dst_y << 16) | dst_x);
      OUT_BATCH((dst_y2 << 16) | dst_x2);
      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
		dst_offset);
      OUT_BATCH((src_y << 16) | src_x);
      OUT_BATCH(src_pitch);
      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
		src_offset);
      ADVANCE_BATCH();
   }
   else {
      assert(dst_x < dst_x2);
      assert(h > 0);

      BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
      OUT_BATCH(CMD);
      OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
      OUT_BATCH((0 << 16) | dst_x);
      OUT_BATCH((h << 16) | dst_x2);
      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
		dst_offset + dst_y * dst_pitch);
      OUT_BATCH((0 << 16) | src_x);
      OUT_BATCH(src_pitch);
      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
		src_offset + src_y * src_pitch);
      ADVANCE_BATCH();
   }
}
/* Push the state into the sarea and/or texture memory.
 */
static void
i915_emit_state(struct intel_context *intel)
{
   struct i915_context *i915 = i915_context(&intel->ctx);
   struct i915_hw_state *state = &i915->state;
   int i, count, aper_count;
   GLuint dirty;
   drm_intel_bo *aper_array[3 + I915_TEX_UNITS];
   GET_CURRENT_CONTEXT(ctx);
   BATCH_LOCALS;

   /* We don't hold the lock at this point, so want to make sure that
    * there won't be a buffer wrap between the state emits and the primitive
    * emit header.
    *
    * It might be better to talk about explicit places where
    * scheduling is allowed, rather than assume that it is whenever a
    * batchbuffer fills up.
    */
   intel_batchbuffer_require_space(intel,
				   get_state_size(state) +
                                   INTEL_PRIM_EMIT_SIZE);
   count = 0;
 again:
   if (intel->batch.bo == NULL) {
      _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
      assert(0);
   }
   aper_count = 0;
   dirty = get_dirty(state);

   aper_array[aper_count++] = intel->batch.bo;
   if (dirty & I915_UPLOAD_BUFFERS) {
      if (state->draw_region)
	 aper_array[aper_count++] = state->draw_region->bo;
      if (state->depth_region)
	 aper_array[aper_count++] = state->depth_region->bo;
   }

   if (dirty & I915_UPLOAD_TEX_ALL) {
      for (i = 0; i < I915_TEX_UNITS; i++) {
	 if (dirty & I915_UPLOAD_TEX(i)) {
	    if (state->tex_buffer[i]) {
	       aper_array[aper_count++] = state->tex_buffer[i];
	    }
	 }
      }
   }

   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
       if (count == 0) {
	   count++;
	   intel_batchbuffer_flush(intel);
	   goto again;
       } else {
	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
	   assert(0);
       }
   }

   /* work out list of buffers to emit */
   
   /* Do this here as we may have flushed the batchbuffer above,
    * causing more state to be dirty!
    */
   dirty = get_dirty(state);
   state->emitted |= dirty;
   assert(get_dirty(state) == 0);

   if (INTEL_DEBUG & DEBUG_STATE)
      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);

   if (dirty & I915_UPLOAD_INVARIENT) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
      i915_emit_invarient_state(intel);
   }

   if (dirty & I915_UPLOAD_RASTER_RULES) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n");
      emit(intel, state->RasterRules, sizeof(state->RasterRules));
   }

   if (dirty & I915_UPLOAD_CTX) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_CTX:\n");

      emit(intel, state->Ctx, sizeof(state->Ctx));
   }

   if (dirty & I915_UPLOAD_BLEND) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_BLEND:\n");

      emit(intel, state->Blend, sizeof(state->Blend));
   }

   if (dirty & I915_UPLOAD_BUFFERS) {
      GLuint count;

      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");

      count = 17;
      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
         count++;

      BEGIN_BATCH(count);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
      if (state->draw_region) {
	 OUT_RELOC(state->draw_region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
      } else {
	 OUT_BATCH(0);
      }

      OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
      if (state->depth_region) {
         OUT_RELOC(state->depth_region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
      } else {
	 OUT_BATCH(0);
      }

      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);

      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
         OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);

      ADVANCE_BATCH();
   }

   if (dirty & I915_UPLOAD_STIPPLE) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
      emit(intel, state->Stipple, sizeof(state->Stipple));
   }

   /* Combine all the dirty texture state into a single command to
    * avoid lockups on I915 hardware. 
    */
   if (dirty & I915_UPLOAD_TEX_ALL) {
      int nr = 0;
      GLuint unwind;

      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i))
            nr++;

      BEGIN_BATCH(2 + nr * 3);
      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {
	    OUT_RELOC(state->tex_buffer[i],
		      I915_GEM_DOMAIN_SAMPLER, 0,
		      state->tex_offset[i]);

            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
         }
      ADVANCE_BATCH();

      unwind = intel->batch.used;
      BEGIN_BATCH(2 + nr * 3);
      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
         }
      ADVANCE_BATCH();
      if (i915->last_sampler &&
	  memcmp(intel->batch.map + i915->last_sampler,
		 intel->batch.map + unwind,
		 (2 + nr*3)*sizeof(int)) == 0)
	  intel->batch.used = unwind;
      else
	  i915->last_sampler = unwind;
   }
Exemplo n.º 17
0
/**
 * Copy the back color buffer to the front color buffer. 
 * Used for SwapBuffers().
 */
void
intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
                const drm_clip_rect_t * rect)
{

   struct intel_context *intel;
   const intelScreenPrivate *intelScreen;
   int ret;

   DBG("%s\n", __FUNCTION__);

   assert(dPriv);

   intel = intelScreenContext(dPriv->driScreenPriv->private);
   if (!intel)
      return;

   intelScreen = intel->intelScreen;

   if (intel->last_swap_fence) {
      dri_fence_wait(intel->last_swap_fence);
      dri_fence_unreference(intel->last_swap_fence);
      intel->last_swap_fence = NULL;
   }
   intel->last_swap_fence = intel->first_swap_fence;
   intel->first_swap_fence = NULL;

   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
    * should work regardless.
    */
   LOCK_HARDWARE(intel);

   if (dPriv && dPriv->numClipRects) {
      struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
      struct intel_region *src, *dst;
      int nbox = dPriv->numClipRects;
      drm_clip_rect_t *pbox = dPriv->pClipRects;
      int cpp;
      int src_pitch, dst_pitch;
      unsigned short src_x, src_y;
      int BR13, CMD;
      int i;

      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);

      src_pitch = src->pitch * src->cpp;
      dst_pitch = dst->pitch * dst->cpp;

      cpp = src->cpp;

      ASSERT(intel_fb);
      ASSERT(intel_fb->Base.Name == 0);    /* Not a user-created FBO */
      ASSERT(src);
      ASSERT(dst);
      ASSERT(src->cpp == dst->cpp);

      if (cpp == 2) {
	 BR13 = (0xCC << 16) | (1 << 24);
	 CMD = XY_SRC_COPY_BLT_CMD;
      }
      else {
	 BR13 = (0xCC << 16) | (1 << 24) | (1 << 25);
	 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      }

#ifndef I915
      if (src->tiled) {
	 CMD |= XY_SRC_TILED;
	 src_pitch /= 4;
      }
      if (dst->tiled) {
	 CMD |= XY_DST_TILED;
	 dst_pitch /= 4;
      }
#endif
      /* do space/cliprects check before going any further */
      intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS);
   again:
      ret = dri_bufmgr_check_aperture_space(dst->buffer);
      ret |= dri_bufmgr_check_aperture_space(src->buffer);
      
      if (ret) {
	intel_batchbuffer_flush(intel->batch);
	goto again;
      }
      
      for (i = 0; i < nbox; i++, pbox++) {
	 drm_clip_rect_t box = *pbox;

	 if (rect) {
	    if (!intel_intersect_cliprects(&box, &box, rect))
	       continue;
	 }

	 if (box.x1 >= box.x2 ||
	     box.y1 >= box.y2)
	    continue;

	 assert(box.x1 < box.x2);
	 assert(box.y1 < box.y2);
	 src_x = box.x1 - dPriv->x + dPriv->backX;
	 src_y = box.y1 - dPriv->y + dPriv->backY;

	 BEGIN_BATCH(8, REFERENCES_CLIPRECTS);
	 OUT_BATCH(CMD);
	 OUT_BATCH(BR13 | dst_pitch);
	 OUT_BATCH((box.y1 << 16) | box.x1);
	 OUT_BATCH((box.y2 << 16) | box.x2);

	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
	 OUT_BATCH((src_y << 16) | src_x);
	 OUT_BATCH(src_pitch);
	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
	 ADVANCE_BATCH();
      }

      if (intel->first_swap_fence)
	 dri_fence_unreference(intel->first_swap_fence);
      intel_batchbuffer_flush(intel->batch);
      intel->first_swap_fence = intel->batch->last_fence;
      if (intel->first_swap_fence)
	 dri_fence_reference(intel->first_swap_fence);
   }

   UNLOCK_HARDWARE(intel);
}
Exemplo n.º 18
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static bool brw_try_draw_prims( struct gl_context *ctx,
				     const struct gl_client_array *arrays[],
				     const struct _mesa_prim *prim,
				     GLuint nr_prims,
				     const struct _mesa_index_buffer *ib,
				     GLuint min_index,
				     GLuint max_index )
{
   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   bool retval = true;
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state( ctx );

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the 
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures( brw );

   /* Resolves must occur after updating state and finalizing textures but
    * before setting up any hardware state for this draw call.
    */
   brw_predraw_resolve_buffers(brw);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );

   brw->ib.ib = ib;
   brw->state.dirty.brw |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->state.dirty.brw |= BRW_NEW_VERTICES;

   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
    * Note this is where brw->vs->prog_data.inputs_read is calculated,
    * so can't access it earlier.
    */

   intel_prepare_render(intel);

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
				  (sizeof(struct brw_sampler_state) +
				   sizeof(struct gen5_sampler_default_color)));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
      intel_batchbuffer_save_state(intel);

      if (intel->gen < 6)
	 brw_set_prim(brw, &prim[i]);
      else
	 gen6_set_prim(brw, &prim[i]);

retry:
      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->state.dirty.brw.
       */
      if (brw->state.dirty.brw) {
	 intel->no_batch_wrap = true;
	 brw_upload_state(brw);

	 if (unlikely(brw->intel.Fallback)) {
	    intel->no_batch_wrap = false;
	    retval = false;
	    goto out;
	 }
      }

      if (intel->gen >= 7)
	 gen7_emit_prim(brw, &prim[i], brw->primitive);
      else
	 brw_emit_prim(brw, &prim[i], brw->primitive);

      intel->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(intel);
	    intel_batchbuffer_flush(intel);
	    fail_next = true;
	    goto retry;
	 } else {
	    if (intel_batchbuffer_flush(intel) == -ENOSPC) {
	       static bool warned = false;

	       if (!warned) {
		  fprintf(stderr, "i965: Single primitive emit exceeded"
			  "available aperture space\n");
		  warned = true;
	       }

	       retval = false;
	    }
	 }
      }

      if (!_mesa_meta_in_progress(ctx))
         brw_update_primitive_count(brw, &prim[i]);
   }

   if (intel->always_flush_batch)
      intel_batchbuffer_flush(intel);
 out:

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return retval;
}
Exemplo n.º 19
0
void
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
				  GLuint cpp,
				  GLubyte *src_bits, GLuint src_size,
				  GLuint fg_color,
				  GLshort dst_pitch,
				  dri_bo *dst_buffer,
				  GLuint dst_offset,
				  GLboolean dst_tiled,
				  GLshort x, GLshort y,
				  GLshort w, GLshort h,
				  GLenum logic_op)
{
   int dwords = ALIGN(src_size, 8) / 4;
   uint32_t opcode, br13, blit_cmd;

   assert( logic_op - GL_CLEAR >= 0 );
   assert( logic_op - GL_CLEAR < 0x10 );

   if (w < 0 || h < 0)
      return;

   dst_pitch *= cpp;

   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
       __FUNCTION__,
       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);

   intel_batchbuffer_require_space( intel->batch,
				    (8 * 4) +
				    (3 * 4) +
				    dwords,
				    NO_LOOP_CLIPRECTS );

   opcode = XY_SETUP_BLT_CMD;
   if (cpp == 4)
      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
#ifndef I915
   if (dst_tiled) {
      opcode |= XY_DST_TILED;
      dst_pitch /= 4;
   }
#endif

   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
   if (cpp == 2)
      br13 |= BR13_565;
   else
      br13 |= BR13_8888;

   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
   if (dst_tiled)
      blit_cmd |= XY_DST_TILED;

   BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS);
   OUT_BATCH(opcode);
   OUT_BATCH(br13);
   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
   OUT_BATCH(0); /* bg */
   OUT_BATCH(fg_color); /* fg */
   OUT_BATCH(0); /* pattern base addr */

   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   ADVANCE_BATCH();

   intel_batchbuffer_data( intel->batch,
			   src_bits,
			   dwords * 4,
			   NO_LOOP_CLIPRECTS );
}
Exemplo n.º 20
0
bool
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
				  GLuint cpp,
				  GLubyte *src_bits, GLuint src_size,
				  GLuint fg_color,
				  GLshort dst_pitch,
				  drm_intel_bo *dst_buffer,
				  GLuint dst_offset,
				  uint32_t dst_tiling,
				  GLshort x, GLshort y,
				  GLshort w, GLshort h,
				  GLenum logic_op)
{
   int dwords = ALIGN(src_size, 8) / 4;
   uint32_t opcode, br13, blit_cmd;

   if (dst_tiling != I915_TILING_NONE) {
      if (dst_offset & 4095)
	 return false;
      if (dst_tiling == I915_TILING_Y)
	 return false;
   }

   assert((logic_op >= GL_CLEAR) && (logic_op <= (GL_CLEAR + 0x0f)));
   assert(dst_pitch > 0);

   if (w < 0 || h < 0)
      return true;

   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
       __FUNCTION__,
       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);

   intel_batchbuffer_require_space(intel,
				   (8 * 4) +
				   (3 * 4) +
				   dwords * 4);

   opcode = XY_SETUP_BLT_CMD;
   if (cpp == 4)
      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;

   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
   br13 |= br13_for_cpp(cpp);

   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
   if (dst_tiling != I915_TILING_NONE)
      blit_cmd |= XY_DST_TILED;

   BEGIN_BATCH(8 + 3);
   OUT_BATCH(opcode | (8 - 2));
   OUT_BATCH(br13);
   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
   OUT_RELOC_FENCED(dst_buffer,
		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		    dst_offset);
   OUT_BATCH(0); /* bg */
   OUT_BATCH(fg_color); /* fg */
   OUT_BATCH(0); /* pattern base addr */

   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   ADVANCE_BATCH();

   intel_batchbuffer_data(intel, src_bits, dwords * 4);

   intel_batchbuffer_emit_mi_flush(intel);

   return true;
}
Exemplo n.º 21
0
/* Copy BitBlt
 */
GLboolean
intelEmitCopyBlit(struct intel_context *intel,
                  GLuint cpp,
                  GLshort src_pitch,
                  drm_intel_bo *src_buffer,
                  GLuint src_offset,
                  uint32_t src_tiling,
                  GLshort dst_pitch,
                  drm_intel_bo *dst_buffer,
                  GLuint dst_offset,
                  uint32_t dst_tiling,
                  GLshort src_x, GLshort src_y,
                  GLshort dst_x, GLshort dst_y,
                  GLshort w, GLshort h,
                  GLenum logic_op)
{
    GLuint CMD, BR13, pass = 0;
    int dst_y2 = dst_y + h;
    int dst_x2 = dst_x + w;
    drm_intel_bo *aper_array[3];
    BATCH_LOCALS;

    if (dst_tiling != I915_TILING_NONE) {
        if (dst_offset & 4095)
            return GL_FALSE;
        if (dst_tiling == I915_TILING_Y)
            return GL_FALSE;
    }
    if (src_tiling != I915_TILING_NONE) {
        if (src_offset & 4095)
            return GL_FALSE;
        if (src_tiling == I915_TILING_Y)
            return GL_FALSE;
    }

    /* do space check before going any further */
    do {
        aper_array[0] = intel->batch.bo;
        aper_array[1] = dst_buffer;
        aper_array[2] = src_buffer;

        if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
            intel_batchbuffer_flush(intel);
            pass++;
        } else
            break;
    } while (pass < 2);

    if (pass >= 2)
        return GL_FALSE;

    intel_batchbuffer_require_space(intel, 8 * 4, true);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
        dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

    src_pitch *= cpp;
    dst_pitch *= cpp;

    /* For big formats (such as floating point), do the copy using 32bpp and
     * multiply the coordinates.
     */
    if (cpp > 4) {
        assert(cpp % 4 == 0);
        dst_x *= cpp / 4;
        dst_x2 *= cpp / 4;
        src_x *= cpp / 4;
        cpp = 4;
    }

    BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;

    switch (cpp) {
    case 1:
    case 2:
        CMD = XY_SRC_COPY_BLT_CMD;
        break;
    case 4:
        CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
        break;
    default:
        return GL_FALSE;
    }

#ifndef I915
    if (dst_tiling != I915_TILING_NONE) {
        CMD |= XY_DST_TILED;
        dst_pitch /= 4;
    }
    if (src_tiling != I915_TILING_NONE) {
        CMD |= XY_SRC_TILED;
        src_pitch /= 4;
    }
#endif

    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
        return GL_TRUE;
    }

    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);

    BEGIN_BATCH_BLT(8);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
    OUT_BATCH((dst_y2 << 16) | dst_x2);
    OUT_RELOC_FENCED(dst_buffer,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     dst_offset);
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH((uint16_t)src_pitch);
    OUT_RELOC_FENCED(src_buffer,
                     I915_GEM_DOMAIN_RENDER, 0,
                     src_offset);
    ADVANCE_BATCH();

    intel_batchbuffer_emit_mi_flush(intel);

    return GL_TRUE;
}