Пример #1
0
static void brw_emit_prim(struct brw_context *brw,
			  const struct _mesa_prim *prim,
			  uint32_t hw_prim)
{
   struct intel_context *intel = &brw->intel;
   int verts_per_instance;
   int vertex_access_type;
   int start_vertex_location;
   int base_vertex_location;

   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
       prim->start, prim->count);

   start_vertex_location = prim->start;
   base_vertex_location = prim->basevertex;
   if (prim->indexed) {
      vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
      start_vertex_location += brw->ib.start_vertex_offset;
      base_vertex_location += brw->vb.start_vertex_bias;
   } else {
      vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
      start_vertex_location += brw->vb.start_vertex_bias;
   }

   /* We only need to trim the primitive count on pre-Gen6. */
   if (intel->gen < 6)
      verts_per_instance = trim(prim->mode, prim->count);
   else
      verts_per_instance = prim->count;

   /* If nothing to emit, just return. */
   if (verts_per_instance == 0)
      return;

   /* If we're set to always flush, do it before and after the primitive emit.
    * We want to catch both missed flushes that hurt instruction/state cache
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
   if (intel->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(intel);
   }

   BEGIN_BATCH(6);
   OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
	     hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
	     vertex_access_type);
   OUT_BATCH(verts_per_instance);
   OUT_BATCH(start_vertex_location);
   OUT_BATCH(prim->num_instances);
   OUT_BATCH(prim->base_instance);
   OUT_BATCH(base_vertex_location);
   ADVANCE_BATCH();

   intel->batch.need_workaround_flush = true;

   if (intel->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(intel);
   }
}
/**
 * Called via glUnmapBuffer().
 */
static GLboolean
intel_bufferobj_unmap(GLcontext * ctx,
                      GLenum target, struct gl_buffer_object *obj)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);
   assert(obj->Pointer);
   if (intel_obj->sys_buffer != NULL) {
      /* always keep the mapping around. */
   } else if (intel_obj->range_map_buffer != NULL) {
      /* Since we've emitted some blits to buffers that will (likely) be used
       * in rendering operations in other cache domains in this batch, emit a
       * flush.  Once again, we wish for a domain tracker in libdrm to cover
       * usage inside of a batchbuffer.
       */
      intel_batchbuffer_emit_mi_flush(intel->batch);
      free(intel_obj->range_map_buffer);
      intel_obj->range_map_buffer = NULL;
   } else if (intel_obj->range_map_bo != NULL) {
      if (intel_obj->mapped_gtt) {
	 drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo);
      } else {
	 drm_intel_bo_unmap(intel_obj->range_map_bo);
      }

      intel_emit_linear_blit(intel,
			     intel_obj->buffer, obj->Offset,
			     intel_obj->range_map_bo, 0,
			     obj->Length);

      /* Since we've emitted some blits to buffers that will (likely) be used
       * in rendering operations in other cache domains in this batch, emit a
       * flush.  Once again, we wish for a domain tracker in libdrm to cover
       * usage inside of a batchbuffer.
       */
      intel_batchbuffer_emit_mi_flush(intel->batch);

      drm_intel_bo_unreference(intel_obj->range_map_bo);
      intel_obj->range_map_bo = NULL;
   } else if (intel_obj->buffer != NULL) {
      if (intel_obj->mapped_gtt) {
	 drm_intel_gem_bo_unmap_gtt(intel_obj->buffer);
      } else {
	 drm_intel_bo_unmap(intel_obj->buffer);
      }
   }
   obj->Pointer = NULL;
   obj->Offset = 0;
   obj->Length = 0;

   return GL_TRUE;
}
Пример #3
0
static void gen7_emit_prim(struct brw_context *brw,
			   const struct _mesa_prim *prim,
			   uint32_t hw_prim)
{
   struct intel_context *intel = &brw->intel;
   int verts_per_instance;
   int vertex_access_type;
   int start_vertex_location;
   int base_vertex_location;

   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
       prim->start, prim->count);

   start_vertex_location = prim->start;
   base_vertex_location = prim->basevertex;
   if (prim->indexed) {
      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
      start_vertex_location += brw->ib.start_vertex_offset;
      base_vertex_location += brw->vb.start_vertex_bias;
   } else {
      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
      start_vertex_location += brw->vb.start_vertex_bias;
   }

   verts_per_instance = trim(prim->mode, prim->count);

   /* If nothing to emit, just return. */
   if (verts_per_instance == 0)
      return;

   /* If we're set to always flush, do it before and after the primitive emit.
    * We want to catch both missed flushes that hurt instruction/state cache
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
   if (intel->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(intel);
   }

   BEGIN_BATCH(7);
   OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
   OUT_BATCH(hw_prim | vertex_access_type);
   OUT_BATCH(verts_per_instance);
   OUT_BATCH(start_vertex_location);
   OUT_BATCH(1); // instance count
   OUT_BATCH(0); // start instance location
   OUT_BATCH(base_vertex_location);
   ADVANCE_BATCH();

   if (intel->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(intel);
   }
}
Пример #4
0
void
brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
{
   switch (brw->gen) {
   case 6:
      gen6_blorp_exec(brw, params);
      break;
   case 7:
      gen7_blorp_exec(brw, params);
      break;
   default:
      /* BLORP is not supported before Gen6. */
      assert(false);
      break;
   }

   if (unlikely(brw->always_flush_batch))
      intel_batchbuffer_flush(brw);

   /* We've smashed all state compared to what the normal 3D pipeline
    * rendering tracks for GL.
    */
   brw->state.dirty.brw = ~0;
   brw->state.dirty.cache = ~0;
   brw->state_batch_count = 0;
   brw->batch.need_workaround_flush = true;
   brw->ib.type = -1;
   intel_batchbuffer_clear_cache(brw);

   /* Flush the sampler cache so any texturing from the destination is
    * coherent.
    */
   intel_batchbuffer_emit_mi_flush(brw);
}
Пример #5
0
static void
intel_bufferobj_copy_subdata(struct gl_context *ctx,
			     struct gl_buffer_object *src,
			     struct gl_buffer_object *dst,
			     GLintptr read_offset, GLintptr write_offset,
			     GLsizeiptr size)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_src = intel_buffer_object(src);
   struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
   drm_intel_bo *src_bo, *dst_bo;
   GLuint src_offset;

   if (size == 0)
      return;

   /* If we're in system memory, just map and memcpy. */
   if (intel_src->sys_buffer || intel_dst->sys_buffer || intel->gen >= 6) {
      /* The same buffer may be used, but note that regions copied may
       * not overlap.
       */
      if (src == dst) {
	 char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
					       GL_MAP_READ_BIT, dst);
	 memmove(ptr + write_offset, ptr + read_offset, size);
	 intel_bufferobj_unmap(ctx, dst);
      } else {
	 const char *src_ptr;
	 char *dst_ptr;

	 src_ptr =  intel_bufferobj_map_range(ctx, 0, src->Size,
					      GL_MAP_READ_BIT, src);
	 dst_ptr =  intel_bufferobj_map_range(ctx, 0, dst->Size,
					      GL_MAP_WRITE_BIT, dst);

	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);

	 intel_bufferobj_unmap(ctx, src);
	 intel_bufferobj_unmap(ctx, dst);
      }
      return;
   }

   /* Otherwise, we have real BOs, so blit them. */

   dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
   src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset);

   intel_emit_linear_blit(intel,
			  dst_bo, write_offset,
			  src_bo, read_offset + src_offset, size);

   /* Since we've emitted some blits to buffers that will (likely) be used
    * in rendering operations in other cache domains in this batch, emit a
    * flush.  Once again, we wish for a domain tracker in libdrm to cover
    * usage inside of a batchbuffer.
    */
   intel_batchbuffer_emit_mi_flush(intel);
}
Пример #6
0
/**
 * Used to initialize the alpha value of an ARGB8888 miptree after copying
 * into it from an XRGB8888 source.
 *
 * This is very common with glCopyTexImage2D().  Note that the coordinates are
 * relative to the start of the miptree, not relative to a slice within the
 * miptree.
 */
static void
intel_miptree_set_alpha_to_one(struct brw_context *brw,
                              struct intel_mipmap_tree *mt,
                              int x, int y, int width, int height)
{
   uint32_t BR13, CMD;
   int pitch, cpp;
   drm_intel_bo *aper_array[2];

   pitch = mt->pitch;
   cpp = mt->cpp;

   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
       __FUNCTION__, mt->bo, pitch, x, y, width, height);

   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
   CMD = XY_COLOR_BLT_CMD;
   CMD |= XY_BLT_WRITE_ALPHA;

   if (mt->tiling != I915_TILING_NONE) {
      CMD |= XY_DST_TILED;
      pitch /= 4;
   }
   BR13 |= pitch;

   /* do space check before going any further */
   aper_array[0] = brw->batch.bo;
   aper_array[1] = mt->bo;

   if (drm_intel_bufmgr_check_aperture_space(aper_array,
					     ARRAY_SIZE(aper_array)) != 0) {
      intel_batchbuffer_flush(brw);
   }

   unsigned length = brw->gen >= 8 ? 7 : 6;
   bool dst_y_tiled = mt->tiling == I915_TILING_Y;

   BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false);
   OUT_BATCH(CMD | (length - 2));
   OUT_BATCH(BR13);
   OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X));
   OUT_BATCH(SET_FIELD(y + height, BLT_Y) | SET_FIELD(x + width, BLT_X));
   if (brw->gen >= 8) {
      OUT_RELOC64(mt->bo,
                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                  0);
   } else {
      OUT_RELOC(mt->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                0);
   }
   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
   ADVANCE_BATCH_TILED(dst_y_tiled, false);

   intel_batchbuffer_emit_mi_flush(brw);
}
Пример #7
0
static void brw_emit_prim(struct brw_context *brw,
			  const struct _mesa_prim *prim,
			  uint32_t hw_prim)
{
   struct brw_3d_primitive prim_packet;
   struct intel_context *intel = &brw->intel;

   if (INTEL_DEBUG & DEBUG_PRIMS)
      printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
		   prim->start, prim->count);

   prim_packet.header.opcode = CMD_3D_PRIM;
   prim_packet.header.length = sizeof(prim_packet)/4 - 2;
   prim_packet.header.pad = 0;
   prim_packet.header.topology = hw_prim;
   prim_packet.header.indexed = prim->indexed;

   prim_packet.verts_per_instance = trim(prim->mode, prim->count);
   prim_packet.start_vert_location = prim->start;
   if (prim->indexed)
      prim_packet.start_vert_location += brw->ib.start_vertex_offset;
   prim_packet.instance_count = 1;
   prim_packet.start_instance_location = 0;
   prim_packet.base_vert_location = prim->basevertex;

   /* If we're set to always flush, do it before and after the primitive emit.
    * We want to catch both missed flushes that hurt instruction/state cache
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
   if (intel->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(intel->batch);
   }
   if (prim_packet.verts_per_instance) {
      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
			      sizeof(prim_packet));
   }
   if (intel->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(intel->batch);
   }
}
Пример #8
0
void
brw_end_transform_feedback(struct gl_context *ctx,
                           struct gl_transform_feedback_object *obj)
{
   /* After EndTransformFeedback, it's likely that the client program will try
    * to draw using the contents of the transform feedback buffer as vertex
    * input.  In order for this to work, we need to flush the data through at
    * least the GS stage of the pipeline, and flush out the render cache.  For
    * simplicity, just do a full flush.
    */
   struct brw_context *brw = brw_context(ctx);
   intel_batchbuffer_emit_mi_flush(brw);
}
Пример #9
0
void
brw_meta_resolve_color(struct brw_context *brw,
                       struct intel_mipmap_tree *mt)
{
   struct gl_context *ctx = &brw->ctx;
   GLuint fbo, rbo;
   struct rect rect;

   intel_batchbuffer_emit_mi_flush(brw);

   _mesa_meta_begin(ctx, MESA_META_ALL);

   _mesa_GenFramebuffers(1, &fbo);
   rbo = brw_get_rb_for_slice(brw, mt, 0, 0, false);

   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
   _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER,
                                 GL_COLOR_ATTACHMENT0,
                                 GL_RENDERBUFFER, rbo);
   _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);

   brw_fast_clear_init(brw);

   use_rectlist(brw, true);

   brw_bind_rep_write_shader(brw, (float *) fast_clear_color);

   set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);

   mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
   get_resolve_rect(brw, mt, &rect);

   brw_draw_rectlist(ctx, &rect, 1);

   set_fast_clear_op(brw, 0);
   use_rectlist(brw, false);

   _mesa_DeleteRenderbuffers(1, &rbo);
   _mesa_DeleteFramebuffers(1, &fbo);

   _mesa_meta_end(ctx);

   /* We're typically called from intel_update_state() and we're supposed to
    * return with the state all updated to what it was before
    * brw_meta_resolve_color() was called.  The meta rendering will have
    * messed up the state and we need to call _mesa_update_state() again to
    * get back to where we were supposed to be when resolve was called.
    */
   if (ctx->NewState)
      _mesa_update_state(ctx);
}
Пример #10
0
/**
 * Called by Mesa when rendering to a texture is done.
 */
static void
intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
{
   struct brw_context *brw = brw_context(ctx);

   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));

   /* Since we've (probably) rendered to the texture and will (likely) use
    * it in the texture domain later on in this batchbuffer, flush the
    * batch.  Once again, we wish for a domain tracker in libdrm to cover
    * usage inside of a batchbuffer like GEM does in the kernel.
    */
   intel_batchbuffer_emit_mi_flush(brw);
}
Пример #11
0
/**
 * Used to initialize the alpha value of an ARGB8888 miptree after copying
 * into it from an XRGB8888 source.
 *
 * This is very common with glCopyTexImage2D().  Note that the coordinates are
 * relative to the start of the miptree, not relative to a slice within the
 * miptree.
 */
static void
intel_miptree_set_alpha_to_one(struct brw_context *brw,
                              struct intel_mipmap_tree *mt,
                              int x, int y, int width, int height)
{
   struct intel_region *region = mt->region;
   uint32_t BR13, CMD;
   int pitch, cpp;
   drm_intel_bo *aper_array[2];

   pitch = region->pitch;
   cpp = region->cpp;

   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
       __FUNCTION__, region->bo, pitch, x, y, width, height);

   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
   CMD = XY_COLOR_BLT_CMD;
   CMD |= XY_BLT_WRITE_ALPHA;

   if (region->tiling != I915_TILING_NONE) {
      CMD |= XY_DST_TILED;
      pitch /= 4;
   }
   BR13 |= pitch;

   /* do space check before going any further */
   aper_array[0] = brw->batch.bo;
   aper_array[1] = region->bo;

   if (drm_intel_bufmgr_check_aperture_space(aper_array,
					     ARRAY_SIZE(aper_array)) != 0) {
      intel_batchbuffer_flush(brw);
   }

   bool dst_y_tiled = region->tiling == I915_TILING_Y;

   BEGIN_BATCH_BLT_TILED(6, dst_y_tiled, false);
   OUT_BATCH(CMD | (6 - 2));
   OUT_BATCH(BR13);
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + height) << 16) | (x + width));
   OUT_RELOC_FENCED(region->bo,
		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		    0);
   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
   ADVANCE_BATCH_TILED(dst_y_tiled, false);

   intel_batchbuffer_emit_mi_flush(brw);
}
Пример #12
0
static void
intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
	       GLenum condition, GLbitfield flags)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_sync_object *sync = (struct intel_sync_object *)s;

   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
   intel_batchbuffer_emit_mi_flush(intel->batch);

   sync->bo = intel->batch->buf;
   drm_intel_bo_reference(sync->bo);

   intelFlush(ctx);
}
Пример #13
0
/**
 * Used to initialize the alpha value of an ARGB8888 miptree after copying
 * into it from an XRGB8888 source.
 *
 * This is very common with glCopyTexImage2D().  Note that the coordinates are
 * relative to the start of the miptree, not relative to a slice within the
 * miptree.
 */
static void
intel_miptree_set_alpha_to_one(struct intel_context *intel,
                              struct intel_mipmap_tree *mt,
                              int x, int y, int width, int height)
{
   struct intel_region *region = mt->region;
   uint32_t BR13, CMD;
   int pitch, cpp;
   drm_intel_bo *aper_array[2];
   BATCH_LOCALS;

   pitch = region->pitch;
   cpp = region->cpp;

   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
       __func__, region->bo, pitch, x, y, width, height);

   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
   CMD = XY_COLOR_BLT_CMD;
   CMD |= XY_BLT_WRITE_ALPHA;

   BR13 |= pitch;

   /* do space check before going any further */
   aper_array[0] = intel->batch.bo;
   aper_array[1] = region->bo;

   if (drm_intel_bufmgr_check_aperture_space(aper_array,
					     ARRAY_SIZE(aper_array)) != 0) {
      intel_batchbuffer_flush(intel);
   }

   BEGIN_BATCH(6);
   OUT_BATCH(CMD | (6 - 2));
   OUT_BATCH(BR13);
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + height) << 16) | (x + width));
   OUT_RELOC_FENCED(region->bo,
		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		    0);
   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel);
}
Пример #14
0
/**
 * Called by Mesa when rendering to a texture is done.
 */
static void
intel_finish_render_texture(GLcontext * ctx,
                            struct gl_renderbuffer_attachment *att)
{
   struct intel_context *intel = intel_context(ctx);
   struct gl_texture_object *tex_obj = att->Texture;
   struct gl_texture_image *image =
      tex_obj->Image[att->CubeMapFace][att->TextureLevel];
   struct intel_texture_image *intel_image = intel_texture_image(image);

   /* Flag that this image may now be validated into the object's miptree. */
   intel_image->used_as_render_target = GL_FALSE;

   /* Since we've (probably) rendered to the texture and will (likely) use
    * it in the texture domain later on in this batchbuffer, flush the
    * batch.  Once again, we wish for a domain tracker in libdrm to cover
    * usage inside of a batchbuffer like GEM does in the kernel.
    */
   intel_batchbuffer_emit_mi_flush(intel->batch);
}
/**
 * Called by Mesa when rendering to a texture is done.
 */
static void
intel_finish_render_texture(struct gl_context * ctx,
                            struct gl_renderbuffer_attachment *att)
{
   struct intel_context *intel = intel_context(ctx);
   struct gl_texture_object *tex_obj = att->Texture;
   struct gl_texture_image *image =
      tex_obj->Image[att->CubeMapFace][att->TextureLevel];
   struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);

   DBG("Finish render %s texture tex=%u\n",
       _mesa_get_format_name(image->TexFormat), att->Texture->Name);

   if (irb)
      irb->tex_image = NULL;

   /* Since we've (probably) rendered to the texture and will (likely) use
    * it in the texture domain later on in this batchbuffer, flush the
    * batch.  Once again, we wish for a domain tracker in libdrm to cover
    * usage inside of a batchbuffer like GEM does in the kernel.
    */
   intel_batchbuffer_emit_mi_flush(intel);
}
Пример #16
0
/**
 * Use blitting to clear the renderbuffers named by 'flags'.
 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
 * since that might include software renderbuffers or renderbuffers
 * which we're clearing with triangles.
 * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
 */
GLbitfield
intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
{
   struct intel_context *intel = intel_context(ctx);
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   GLuint clear_depth_value, clear_depth_mask;
   GLint cx, cy, cw, ch;
   GLbitfield fail_mask = 0;
   BATCH_LOCALS;

   /* Note: we don't use this function on Gen7+ hardware, so we can safely
    * ignore fast color clear issues.
    */
   assert(intel->gen < 7);

   /*
    * Compute values for clearing the buffers.
    */
   clear_depth_value = 0;
   clear_depth_mask = 0;
   if (mask & BUFFER_BIT_DEPTH) {
      clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
      clear_depth_mask = XY_BLT_WRITE_RGB;
   }
   if (mask & BUFFER_BIT_STENCIL) {
      clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24;
      clear_depth_mask |= XY_BLT_WRITE_ALPHA;
   }

   cx = fb->_Xmin;
   if (_mesa_is_winsys_fbo(fb))
      cy = ctx->DrawBuffer->Height - fb->_Ymax;
   else
      cy = fb->_Ymin;
   cw = fb->_Xmax - fb->_Xmin;
   ch = fb->_Ymax - fb->_Ymin;

   if (cw == 0 || ch == 0)
      return 0;

   /* Loop over all renderbuffers */
   mask &= (1 << BUFFER_COUNT) - 1;
   while (mask) {
      GLuint buf = ffs(mask) - 1;
      bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL;
      struct intel_renderbuffer *irb;
      int x1, y1, x2, y2;
      uint32_t clear_val;
      uint32_t BR13, CMD;
      struct intel_region *region;
      int pitch, cpp;
      drm_intel_bo *aper_array[2];

      mask &= ~(1 << buf);

      irb = intel_get_renderbuffer(fb, buf);
      if (irb && irb->mt) {
	 region = irb->mt->region;
	 assert(region);
	 assert(region->bo);
      } else {
         fail_mask |= 1 << buf;
         continue;
      }

      /* OK, clear this renderbuffer */
      x1 = cx + irb->draw_x;
      y1 = cy + irb->draw_y;
      x2 = cx + cw + irb->draw_x;
      y2 = cy + ch + irb->draw_y;

      pitch = region->pitch;
      cpp = region->cpp;

      DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
	  __FUNCTION__,
	  region->bo, pitch,
	  x1, y1, x2 - x1, y2 - y1);

      BR13 = 0xf0 << 16;
      CMD = XY_COLOR_BLT_CMD;

      /* Setup the blit command */
      if (cpp == 4) {
	 if (is_depth_stencil) {
	    CMD |= clear_depth_mask;
	 } else {
	    /* clearing RGBA */
	    CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
	 }
      }

      assert(region->tiling != I915_TILING_Y);

      BR13 |= pitch;

      if (is_depth_stencil) {
	 clear_val = clear_depth_value;
      } else {
	 uint8_t clear[4];
	 GLfloat *color = ctx->Color.ClearColor.f;

	 _mesa_unclamped_float_rgba_to_ubyte(clear, color);

	 switch (intel_rb_format(irb)) {
	 case MESA_FORMAT_ARGB8888:
	 case MESA_FORMAT_XRGB8888:
	    clear_val = PACK_COLOR_8888(clear[3], clear[0],
					clear[1], clear[2]);
	    break;
	 case MESA_FORMAT_RGB565:
	    clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
	    break;
	 case MESA_FORMAT_ARGB4444:
	    clear_val = PACK_COLOR_4444(clear[3], clear[0],
					clear[1], clear[2]);
	    break;
	 case MESA_FORMAT_ARGB1555:
	    clear_val = PACK_COLOR_1555(clear[3], clear[0],
					clear[1], clear[2]);
	    break;
	 case MESA_FORMAT_A8:
	    clear_val = PACK_COLOR_8888(clear[3], clear[3],
					clear[3], clear[3]);
	    break;
	 default:
	    fail_mask |= 1 << buf;
	    continue;
	 }
      }

      BR13 |= br13_for_cpp(cpp);

      assert(x1 < x2);
      assert(y1 < y2);

      /* do space check before going any further */
      aper_array[0] = intel->batch.bo;
      aper_array[1] = region->bo;

      if (drm_intel_bufmgr_check_aperture_space(aper_array,
						ARRAY_SIZE(aper_array)) != 0) {
	 intel_batchbuffer_flush(intel);
      }

      BEGIN_BATCH(6);
      OUT_BATCH(CMD | (6 - 2));
      OUT_BATCH(BR13);
      OUT_BATCH((y1 << 16) | x1);
      OUT_BATCH((y2 << 16) | x2);
      OUT_RELOC_FENCED(region->bo,
		       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		       0);
      OUT_BATCH(clear_val);
      ADVANCE_BATCH();

      if (intel->always_flush_cache)
	 intel_batchbuffer_emit_mi_flush(intel);

      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
	 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
   }

   return fail_mask;
}
Пример #17
0
/* Copy BitBlt
 */
bool
intelEmitCopyBlit(struct intel_context *intel,
		  GLuint cpp,
		  GLshort src_pitch,
		  drm_intel_bo *src_buffer,
		  GLuint src_offset,
		  uint32_t src_tiling,
		  GLshort dst_pitch,
		  drm_intel_bo *dst_buffer,
		  GLuint dst_offset,
		  uint32_t dst_tiling,
		  GLshort src_x, GLshort src_y,
		  GLshort dst_x, GLshort dst_y,
		  GLshort w, GLshort h,
		  GLenum logic_op)
{
   GLuint CMD, BR13, pass = 0;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
   drm_intel_bo *aper_array[3];
   bool dst_y_tiled = dst_tiling == I915_TILING_Y;
   bool src_y_tiled = src_tiling == I915_TILING_Y;
   BATCH_LOCALS;

   if (dst_tiling != I915_TILING_NONE) {
      if (dst_offset & 4095)
	 return false;
   }
   if (src_tiling != I915_TILING_NONE) {
      if (src_offset & 4095)
	 return false;
   }
   if (dst_y_tiled || src_y_tiled)
      return false;

   /* do space check before going any further */
   do {
       aper_array[0] = intel->batch.bo;
       aper_array[1] = dst_buffer;
       aper_array[2] = src_buffer;

       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
           intel_batchbuffer_flush(intel);
           pass++;
       } else
           break;
   } while (pass < 2);

   if (pass >= 2)
      return false;

   intel_batchbuffer_require_space(intel, 8 * 4);
   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
       __FUNCTION__,
       src_buffer, src_pitch, src_offset, src_x, src_y,
       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
    * the low bits.
    */
   if (src_pitch % 4 != 0 || dst_pitch % 4 != 0)
      return false;

   /* For big formats (such as floating point), do the copy using 16 or 32bpp
    * and multiply the coordinates.
    */
   if (cpp > 4) {
      if (cpp % 4 == 2) {
         dst_x *= cpp / 2;
         dst_x2 *= cpp / 2;
         src_x *= cpp / 2;
         cpp = 2;
      } else {
         assert(cpp % 4 == 0);
         dst_x *= cpp / 4;
         dst_x2 *= cpp / 4;
         src_x *= cpp / 4;
         cpp = 4;
      }
   }

   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;

   switch (cpp) {
   case 1:
   case 2:
      CMD = XY_SRC_COPY_BLT_CMD;
      break;
   case 4:
      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      break;
   default:
      return false;
   }

   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
      return true;
   }

   assert(dst_x < dst_x2);
   assert(dst_y < dst_y2);

   BEGIN_BATCH(8);

   OUT_BATCH(CMD | (8 - 2));
   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
   OUT_BATCH((dst_y << 16) | dst_x);
   OUT_BATCH((dst_y2 << 16) | dst_x2);
   OUT_RELOC_FENCED(dst_buffer,
		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		    dst_offset);
   OUT_BATCH((src_y << 16) | src_x);
   OUT_BATCH((uint16_t)src_pitch);
   OUT_RELOC_FENCED(src_buffer,
		    I915_GEM_DOMAIN_RENDER, 0,
		    src_offset);

   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel);

   return true;
}
Пример #18
0
/**
 * Used to initialize the alpha value of an ARGB8888 teximage after
 * loading it from an XRGB8888 source.
 *
 * This is very common with glCopyTexImage2D().
 */
void
intel_set_teximage_alpha_to_one(struct gl_context *ctx,
                                struct intel_texture_image *intel_image)
{
    struct intel_context *intel = intel_context(ctx);
    unsigned int image_x, image_y;
    uint32_t x1, y1, x2, y2;
    uint32_t BR13, CMD;
    int pitch, cpp;
    drm_intel_bo *aper_array[2];
    struct intel_region *region = intel_image->mt->region;
    BATCH_LOCALS;

    assert(intel_image->base.TexFormat == MESA_FORMAT_ARGB8888);

    /* get dest x/y in destination texture */
    intel_miptree_get_image_offset(intel_image->mt,
                                   intel_image->level,
                                   intel_image->face,
                                   0,
                                   &image_x, &image_y);

    x1 = image_x;
    y1 = image_y;
    x2 = image_x + intel_image->base.Width;
    y2 = image_y + intel_image->base.Height;

    pitch = region->pitch;
    cpp = region->cpp;

    DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        intel_image->mt->region->buffer, (pitch * cpp),
        x1, y1, x2 - x1, y2 - y1);

    BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
    CMD = XY_COLOR_BLT_CMD;
    CMD |= XY_BLT_WRITE_ALPHA;

    assert(region->tiling != I915_TILING_Y);

#ifndef I915
    if (region->tiling != I915_TILING_NONE) {
        CMD |= XY_DST_TILED;
        pitch /= 4;
    }
#endif
    BR13 |= (pitch * cpp);

    /* do space check before going any further */
    aper_array[0] = intel->batch.bo;
    aper_array[1] = region->buffer;

    if (drm_intel_bufmgr_check_aperture_space(aper_array,
            ARRAY_SIZE(aper_array)) != 0) {
        intel_batchbuffer_flush(intel);
    }

    BEGIN_BATCH_BLT(6);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13);
    OUT_BATCH((y1 << 16) | x1);
    OUT_BATCH((y2 << 16) | x2);
    OUT_RELOC_FENCED(region->buffer,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     0);
    OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
    ADVANCE_BATCH();

    intel_batchbuffer_emit_mi_flush(intel);
}
Пример #19
0
/**
 * When the GS is not in use, we assign the entire URB space to the VS.  When
 * the GS is in use, we split the URB space evenly between the VS and the GS.
 * This is not ideal, but it's simple.
 *
 *           URB size / 2                   URB size / 2
 *   _____________-______________   _____________-______________
 *  /                            \ /                            \
 * +-------------------------------------------------------------+
 * | Vertex Shader Entries        | Geometry Shader Entries      |
 * +-------------------------------------------------------------+
 *
 * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB.
 * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.)
 */
static void
gen6_upload_urb( struct brw_context *brw )
{
   struct intel_context *intel = &brw->intel;
   int nr_vs_entries, nr_gs_entries;
   int total_urb_size = brw->urb.size * 1024; /* in bytes */

   /* CACHE_NEW_VS_PROG */
   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);

   /* We use the same VUE layout for VS outputs and GS outputs (as it's what
    * the SF and Clipper expect), so we can simply make the GS URB entry size
    * the same as for the VS.  This may technically be too large in cases
    * where we have few vertex attributes and a lot of varyings, since the VS
    * size is determined by the larger of the two.  For now, it's safe.
    */
   brw->urb.gs_size = brw->urb.vs_size;

   /* Calculate how many entries fit in each stage's section of the URB */
   if (brw->gs.prog_active) {
      nr_vs_entries = (total_urb_size/2) / (brw->urb.vs_size * 128);
      nr_gs_entries = (total_urb_size/2) / (brw->urb.gs_size * 128);
   } else {
      nr_vs_entries = total_urb_size / (brw->urb.vs_size * 128);
      nr_gs_entries = 0;
   }

   /* Then clamp to the maximum allowed by the hardware */
   if (nr_vs_entries > brw->urb.max_vs_entries)
      nr_vs_entries = brw->urb.max_vs_entries;

   if (nr_gs_entries > brw->urb.max_gs_entries)
      nr_gs_entries = brw->urb.max_gs_entries;

   /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
   brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);

   assert(brw->urb.nr_vs_entries >= 24);
   assert(brw->urb.nr_vs_entries % 4 == 0);
   assert(brw->urb.nr_gs_entries % 4 == 0);
   assert(brw->urb.vs_size < 5);
   assert(brw->urb.gs_size < 5);

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
   OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
   ADVANCE_BATCH();

   /* From the PRM Volume 2 part 1, section 1.4.7:
    *
    *   Because of a urb corruption caused by allocating a previous gsunit’s
    *   urb entry to vsunit software is required to send a "GS NULL
    *   Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
    *   a dummy DRAW call before any case where VS will be taking over GS URB
    *   space.
    *
    * It is not clear exactly what this means ("URB fence" is a command that
    * doesn't exist on Gen6).  So for now we just do a full pipeline flush as
    * a workaround.
    */
   if (brw->urb.gen6_gs_previously_active && !brw->gs.prog_active)
      intel_batchbuffer_emit_mi_flush(intel);
   brw->urb.gen6_gs_previously_active = brw->gs.prog_active;
}
Пример #20
0
static void
upload_vs_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLcontext *ctx = &intel->ctx;
   const struct brw_vertex_program *vp =
      brw_vertex_program_const(brw->vertex_program);
   unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
   drm_intel_bo *constant_bo;
   int i;

   if (vp->use_const_buffer || nr_params == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      if (brw->vertex_program->IsNVProgram)
	 _mesa_load_tracked_matrices(ctx);

      /* Updates the ParamaterValues[i] pointers for all parameters of the
       * basic type of PROGRAM_STATE_VAR.
       */
      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);

      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
				       nr_params * 4 * sizeof(float),
				       4096);
      drm_intel_gem_bo_map_gtt(constant_bo);
      for (i = 0; i < nr_params; i++) {
	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
		vp->program.Base.Parameters->ParameterValues[i],
		4 * sizeof(float));
      }
      drm_intel_gem_bo_unmap_gtt(constant_bo);

      BEGIN_BATCH(5);
      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
		GEN6_CONSTANT_BUFFER_0_ENABLE |
		(5 - 2));
      OUT_RELOC(constant_bo,
		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
		ALIGN(nr_params, 2) / 2 - 1);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();

      drm_intel_bo_unreference(constant_bo);
   }

   intel_batchbuffer_emit_mi_flush(intel->batch);

   BEGIN_BATCH(6);
   OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
   OUT_BATCH(0); /* scratch space base offset */
   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
	     GEN6_VS_STATISTICS_ENABLE);
   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel->batch);
}
Пример #21
0
bool
brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
                    GLbitfield buffers, bool partial_clear)
{
   struct gl_context *ctx = &brw->ctx;
   mesa_format format;
   enum { FAST_CLEAR, REP_CLEAR, PLAIN_CLEAR } clear_type;
   GLbitfield plain_clear_buffers, meta_save, rep_clear_buffers, fast_clear_buffers;
   struct rect fast_clear_rect, clear_rect;
   int layers;

   fast_clear_buffers = rep_clear_buffers = plain_clear_buffers = 0;

   /* First we loop through the color draw buffers and determine which ones
    * can be fast cleared, which ones can use the replicated write and which
    * ones have to fall back to regular color clear.
    */
   for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
      int index = fb->_ColorDrawBufferIndexes[buf];

      /* Only clear the buffers present in the provided mask */
      if (((1 << index) & buffers) == 0)
         continue;

      /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
       * the framebuffer can be complete with some attachments missing.  In
       * this case the _ColorDrawBuffers pointer will be NULL.
       */
      if (rb == NULL)
         continue;

      clear_type = FAST_CLEAR;

      /* We don't have fast clear until gen7. */
      if (brw->gen < 7)
         clear_type = REP_CLEAR;

      if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
         clear_type = REP_CLEAR;

      /* We can't do scissored fast clears because of the restrictions on the
       * fast clear rectangle size.
       */
      if (partial_clear)
         clear_type = REP_CLEAR;

      /* Fast clear is only supported for colors where all components are
       * either 0 or 1.
       */
      format = _mesa_get_render_format(ctx, irb->mt->format);
      if (!is_color_fast_clear_compatible(brw, format, &ctx->Color.ClearColor))
         clear_type = REP_CLEAR;

      /* From the SNB PRM (Vol4_Part1):
       *
       *     "Replicated data (Message Type = 111) is only supported when
       *      accessing tiled memory.  Using this Message Type to access
       *      linear (untiled) memory is UNDEFINED."
       */
      if (irb->mt->tiling == I915_TILING_NONE) {
         perf_debug("falling back to plain clear because buffers are untiled\n");
         clear_type = PLAIN_CLEAR;
      }

      /* Constant color writes ignore everything in blend and color calculator
       * state.  This is not documented.
       */
      GLubyte *color_mask = ctx->Color.ColorMask[buf];
      for (int i = 0; i < 4; i++) {
         if (_mesa_format_has_color_component(irb->mt->format, i) &&
             !color_mask[i]) {
            perf_debug("falling back to plain clear because of color mask\n");
            clear_type = PLAIN_CLEAR;
         }
      }

      /* Allocate the MCS for non MSRT surfaces now if we're doing a fast
       * clear and we don't have the MCS yet.  On failure, fall back to
       * replicated clear.
       */
      if (clear_type == FAST_CLEAR && irb->mt->mcs_mt == NULL)
         if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt))
            clear_type = REP_CLEAR;

      switch (clear_type) {
      case FAST_CLEAR:
         irb->mt->fast_clear_color_value =
            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
         irb->need_downsample = true;

         /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
          * clear is redundant and can be skipped.  Only skip after we've
          * updated the fast clear color above though.
          */
         if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR)
            continue;

         /* Set fast_clear_state to RESOLVED so we don't try resolve them when
          * we draw, in case the mt is also bound as a texture.
          */
         irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
         irb->need_downsample = true;
         fast_clear_buffers |= 1 << index;
         get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
         break;

      case REP_CLEAR:
         rep_clear_buffers |= 1 << index;
         get_buffer_rect(brw, fb, irb, &clear_rect);
         break;

      case PLAIN_CLEAR:
         plain_clear_buffers |= 1 << index;
         get_buffer_rect(brw, fb, irb, &clear_rect);
         continue;
      }
   }

   if (!(fast_clear_buffers | rep_clear_buffers)) {
      if (plain_clear_buffers)
         /* If we only have plain clears, skip the meta save/restore. */
         goto out;
      else
         /* Nothing left to do.  This happens when we hit the redundant fast
          * clear case above and nothing else.
          */
         return true;
   }

   meta_save =
      MESA_META_ALPHA_TEST |
      MESA_META_BLEND |
      MESA_META_DEPTH_TEST |
      MESA_META_RASTERIZATION |
      MESA_META_SHADER |
      MESA_META_STENCIL_TEST |
      MESA_META_VERTEX |
      MESA_META_VIEWPORT |
      MESA_META_CLIP |
      MESA_META_CLAMP_FRAGMENT_COLOR |
      MESA_META_MULTISAMPLE |
      MESA_META_OCCLUSION_QUERY |
      MESA_META_DRAW_BUFFERS;

   _mesa_meta_begin(ctx, meta_save);

   if (!brw_fast_clear_init(brw)) {
      /* This is going to be hard to recover from, most likely out of memory.
       * Bail and let meta try and (probably) fail for us.
       */
      plain_clear_buffers = buffers;
      goto bail_to_meta;
   }

   /* Clears never have the color clamped. */
   if (ctx->Extensions.ARB_color_buffer_float)
      _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);

   _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
   _mesa_DepthMask(GL_FALSE);
   _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);

   use_rectlist(brw, true);

   layers = MAX2(1, fb->MaxNumLayers);
   if (fast_clear_buffers) {
      _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers);
      brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
      set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
      brw_draw_rectlist(ctx, &fast_clear_rect, layers);
      set_fast_clear_op(brw, 0);
   }

   if (rep_clear_buffers) {
      _mesa_meta_drawbuffers_from_bitfield(rep_clear_buffers);
      brw_bind_rep_write_shader(brw, ctx->Color.ClearColor.f);
      brw_draw_rectlist(ctx, &clear_rect, layers);
   }

   /* Now set the mts we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
    * resolve them eventually.
    */
   for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
      int index = fb->_ColorDrawBufferIndexes[buf];

      if ((1 << index) & fast_clear_buffers)
         irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
   }

 bail_to_meta:
   /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear
    * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if
    * we render to it.
    */
   brw->NewGLState |= _NEW_BUFFERS;


   /* Set the custom state back to normal and dirty the same bits as above */
   use_rectlist(brw, false);

   _mesa_meta_end(ctx);

   /* From BSpec: Render Target Fast Clear:
    *
    *     After Render target fast clear, pipe-control with color cache
    *     write-flush must be issued before sending any DRAW commands on that
    *     render target.
    */
   intel_batchbuffer_emit_mi_flush(brw);

   /* If we had to fall back to plain clear for any buffers, clear those now
    * by calling into meta.
    */
 out:
   if (plain_clear_buffers)
      _mesa_meta_glsl_Clear(&brw->ctx, plain_clear_buffers);

   return true;
}
Пример #22
0
static void
upload_sf_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLcontext *ctx = &intel->ctx;
   /* CACHE_NEW_VS_PROG */
   uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
   uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead);
   uint32_t dw1, dw2, dw3, dw4, dw16;
   int i;
   /* _NEW_BUFFER */
   GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
   int attr = 0;

   dw1 =
      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
      (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
      1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
   dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
      GEN6_SF_STATISTICS_ENABLE;
   dw3 = 0;
   dw4 = 0;
   dw16 = 0;

   /* _NEW_POLYGON */
   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
      dw2 |= GEN6_SF_WINDING_CCW;

   if (ctx->Polygon.OffsetFill)
       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;

   /* _NEW_SCISSOR */
   if (ctx->Scissor.Enabled)
      dw3 |= GEN6_SF_SCISSOR_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.CullFlag) {
      switch (ctx->Polygon.CullFaceMode) {
      case GL_FRONT:
	 dw3 |= GEN6_SF_CULL_FRONT;
	 break;
      case GL_BACK:
	 dw3 |= GEN6_SF_CULL_BACK;
	 break;
      case GL_FRONT_AND_BACK:
	 dw3 |= GEN6_SF_CULL_BOTH;
	 break;
      default:
	 assert(0);
	 break;
      }
   } else {
      dw3 |= GEN6_SF_CULL_NONE;
   }

   /* _NEW_LINE */
   dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
      GEN6_SF_LINE_WIDTH_SHIFT;
   if (ctx->Line.SmoothFlag) {
      dw3 |= GEN6_SF_LINE_AA_ENABLE;
      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
   }

   /* _NEW_POINT */
   if (ctx->Point._Attenuated)
      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;

   dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
      GEN6_SF_POINT_WIDTH_SHIFT;
   if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
      dw4 |=
	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
   } else {
      dw4 |=
	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
   }

   if (ctx->Point.PointSprite) {
       for (i = 0; i < 8; i++) { 
	   if (ctx->Point.CoordReplace[i])
	       dw16 |= (1 << i);
       }
   }

   BEGIN_BATCH(20);
   OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   OUT_BATCH(dw3);
   OUT_BATCH(dw4);
   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
   for (i = 0; i < 8; i++) {
      uint32_t attr_overrides = 0;

      for (; attr < 64; attr++) {
	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
	    attr_overrides |= get_attr_override(brw, attr);
	    attr++;
	    break;
	 }
      }

      for (; attr < 64; attr++) {
	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
	    attr_overrides |= get_attr_override(brw, attr) << 16;
	    attr++;
	    break;
	 }
      }
      OUT_BATCH(attr_overrides);
   }
   OUT_BATCH(dw16); /* point sprite texcoord bitmask */
   OUT_BATCH(0); /* constant interp bitmask */
   OUT_BATCH(0); /* wrapshortest enables 0-7 */
   OUT_BATCH(0); /* wrapshortest enables 8-15 */
   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel->batch);
}
Пример #23
0
/**
 * Implements fast depth clears on gen6+.
 *
 * Fast clears basically work by setting a flag in each of the subspans
 * represented in the HiZ buffer that says "When you need the depth values for
 * this subspan, it's the hardware's current clear value."  Then later rendering
 * can just use the static clear value instead of referencing memory.
 *
 * The tricky part of the implementation is that you have to have the clear
 * value that was used on the depth buffer in place for all further rendering,
 * at least until a resolve to the real depth buffer happens.
 */
static bool
brw_fast_clear_depth(struct gl_context *ctx)
{
   struct brw_context *brw = brw_context(ctx);
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   struct intel_renderbuffer *depth_irb =
      intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_mipmap_tree *mt = depth_irb->mt;
   struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];

   if (brw->gen < 6)
      return false;

   if (!intel_renderbuffer_has_hiz(depth_irb))
      return false;

   /* We only handle full buffer clears -- otherwise you'd have to track whether
    * a previous clear had happened at a different clear value and resolve it
    * first.
    */
   if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(ctx, fb)) {
      perf_debug("Failed to fast clear depth due to scissor being enabled.  "
                 "Possible 5%% performance win if avoided.\n");
      return false;
   }

   uint32_t depth_clear_value;
   switch (mt->format) {
   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
   case MESA_FORMAT_Z24_UNORM_S8_UINT:
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
       *      enabled (the legacy method of clearing must be performed):
       *
       *      - If the depth buffer format is D32_FLOAT_S8X24_UINT or
       *        D24_UNORM_S8_UINT.
       */
      return false;

   case MESA_FORMAT_Z_FLOAT32:
      depth_clear_value = float_as_int(ctx->Depth.Clear);
      break;

   case MESA_FORMAT_Z_UNORM16:
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
       *      enabled (the legacy method of clearing must be performed):
       *
       *      - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
       *        width of the map (LOD0) is not multiple of 16, fast clear
       *        optimization must be disabled.
       */
      if (brw->gen == 6 &&
          (minify(mt->physical_width0,
                  depth_irb->mt_level - mt->first_level) % 16) != 0)
	 return false;
      /* FALLTHROUGH */

   default:
      if (brw->gen >= 8)
         depth_clear_value = float_as_int(ctx->Depth.Clear);
      else
         depth_clear_value = fb->_DepthMax * ctx->Depth.Clear;
      break;
   }

   /* If we're clearing to a new clear value, then we need to resolve any clear
    * flags out of the HiZ buffer into the real depth buffer.
    */
   if (mt->depth_clear_value != depth_clear_value) {
      intel_miptree_all_slices_resolve_depth(brw, mt);
      mt->depth_clear_value = depth_clear_value;
   }

   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
    *
    *     "If other rendering operations have preceded this clear, a
    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
    *      must be issued before the rectangle primitive used for the depth
    *      buffer clear operation.
    */
   intel_batchbuffer_emit_mi_flush(brw);

   if (fb->MaxNumLayers > 0) {
      for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
         intel_hiz_exec(brw, mt, depth_irb->mt_level,
                        depth_irb->mt_layer + layer,
                        GEN6_HIZ_OP_DEPTH_CLEAR);
      }
   } else {
      intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
                     GEN6_HIZ_OP_DEPTH_CLEAR);
   }

   if (brw->gen == 6) {
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be followed
       *      by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
       *      followed by Depth FLUSH'
      */
      intel_batchbuffer_emit_mi_flush(brw);
   }

   /* Now, the HiZ buffer contains data that needs to be resolved to the depth
    * buffer.
    */
   intel_renderbuffer_att_set_needs_depth_resolve(depth_att);

   return true;
}
static void brw_emit_prim(struct brw_context *brw,
			  const struct _mesa_prim *prim,
			  uint32_t hw_prim)
{
   int verts_per_instance;
   int vertex_access_type;
   int start_vertex_location;
   int base_vertex_location;
   int indirect_flag;

   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
       prim->start, prim->count);

   start_vertex_location = prim->start;
   base_vertex_location = prim->basevertex;
   if (prim->indexed) {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
      start_vertex_location += brw->ib.start_vertex_offset;
      base_vertex_location += brw->vb.start_vertex_bias;
   } else {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
      start_vertex_location += brw->vb.start_vertex_bias;
   }

   /* We only need to trim the primitive count on pre-Gen6. */
   if (brw->gen < 6)
      verts_per_instance = trim(prim->mode, prim->count);
   else
      verts_per_instance = prim->count;

   /* If nothing to emit, just return. */
   if (verts_per_instance == 0 && !prim->is_indirect)
      return;

   /* If we're set to always flush, do it before and after the primitive emit.
    * We want to catch both missed flushes that hurt instruction/state cache
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
   if (brw->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(brw);
   }

   /* If indirect, emit a bunch of loads from the indirect BO. */
   if (prim->is_indirect) {
      struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
            intel_buffer_object(indirect_buffer),
            prim->indirect_offset, 5 * sizeof(GLuint));

      indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;

      BEGIN_BATCH(15);

      OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
      OUT_BATCH(GEN7_3DPRIM_VERTEX_COUNT);
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
            prim->indirect_offset + 0);
      OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
      OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT);
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
            prim->indirect_offset + 4);
      OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
      OUT_BATCH(GEN7_3DPRIM_START_VERTEX);
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
            prim->indirect_offset + 8);

      if (prim->indexed) {
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
         OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
               prim->indirect_offset + 12);
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
         OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
               prim->indirect_offset + 16);
      }
      else {
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
         OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
               prim->indirect_offset + 12);
         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
         OUT_BATCH(0);
      }

      ADVANCE_BATCH();
   }
   else {
      indirect_flag = 0;
   }


   if (brw->gen >= 7) {
      BEGIN_BATCH(7);
      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag);
      OUT_BATCH(hw_prim | vertex_access_type);
   } else {
      BEGIN_BATCH(6);
      OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
                hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
                vertex_access_type);
   }
   OUT_BATCH(verts_per_instance);
   OUT_BATCH(start_vertex_location);
   OUT_BATCH(prim->num_instances);
   OUT_BATCH(prim->base_instance);
   OUT_BATCH(base_vertex_location);
   ADVANCE_BATCH();

   /* Only used on Sandybridge; harmless to set elsewhere. */
   brw->batch.need_workaround_flush = true;

   if (brw->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(brw);
   }
}
Пример #25
0
bool
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
				  GLuint cpp,
				  GLubyte *src_bits, GLuint src_size,
				  GLuint fg_color,
				  GLshort dst_pitch,
				  drm_intel_bo *dst_buffer,
				  GLuint dst_offset,
				  uint32_t dst_tiling,
				  GLshort x, GLshort y,
				  GLshort w, GLshort h,
				  GLenum logic_op)
{
   int dwords = ALIGN(src_size, 8) / 4;
   uint32_t opcode, br13, blit_cmd;

   if (dst_tiling != I915_TILING_NONE) {
      if (dst_offset & 4095)
	 return false;
      if (dst_tiling == I915_TILING_Y)
	 return false;
   }

   assert((logic_op >= GL_CLEAR) && (logic_op <= (GL_CLEAR + 0x0f)));
   assert(dst_pitch > 0);

   if (w < 0 || h < 0)
      return true;

   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
       __FUNCTION__,
       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);

   intel_batchbuffer_require_space(intel,
				   (8 * 4) +
				   (3 * 4) +
				   dwords * 4);

   opcode = XY_SETUP_BLT_CMD;
   if (cpp == 4)
      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;

   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
   br13 |= br13_for_cpp(cpp);

   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
   if (dst_tiling != I915_TILING_NONE)
      blit_cmd |= XY_DST_TILED;

   BEGIN_BATCH(8 + 3);
   OUT_BATCH(opcode | (8 - 2));
   OUT_BATCH(br13);
   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
   OUT_RELOC_FENCED(dst_buffer,
		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		    dst_offset);
   OUT_BATCH(0); /* bg */
   OUT_BATCH(fg_color); /* fg */
   OUT_BATCH(0); /* pattern base addr */

   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   ADVANCE_BATCH();

   intel_batchbuffer_data(intel, src_bits, dwords * 4);

   intel_batchbuffer_emit_mi_flush(intel);

   return true;
}
Пример #26
0
void
brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
{
   struct gl_context *ctx = &brw->ctx;
   uint32_t estimated_max_batch_usage = 1500;
   bool check_aperture_failed_once = false;

   /* Flush the sampler and render caches.  We definitely need to flush the
    * sampler cache so that we get updated contents from the render cache for
    * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
    * docs to flush the cache between reinterpretations of the same surface
    * data with different formats, which blorp does for stencil and depth
    * data.
    */
   intel_batchbuffer_emit_mi_flush(brw);

retry:
   intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
   intel_batchbuffer_save_state(brw);
   drm_intel_bo *saved_bo = brw->batch.bo;
   uint32_t saved_used = brw->batch.used;
   uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;

   switch (brw->gen) {
   case 6:
      gen6_blorp_exec(brw, params);
      break;
   case 7:
      gen7_blorp_exec(brw, params);
      break;
   default:
      /* BLORP is not supported before Gen6. */
      assert(false);
      break;
   }

   /* Make sure we didn't wrap the batch unintentionally, and make sure we
    * reserved enough space that a wrap will never happen.
    */
   assert(brw->batch.bo == saved_bo);
   assert((brw->batch.used - saved_used) * 4 +
          (saved_state_batch_offset - brw->batch.state_batch_offset) <
          estimated_max_batch_usage);
   /* Shut up compiler warnings on release build */
   (void)saved_bo;
   (void)saved_used;
   (void)saved_state_batch_offset;

   /* Check if the blorp op we just did would make our batch likely to fail to
    * map all the BOs into the GPU at batch exec time later.  If so, flush the
    * batch and try again with nothing else in the batch.
    */
   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
      if (!check_aperture_failed_once) {
         check_aperture_failed_once = true;
         intel_batchbuffer_reset_to_saved(brw);
         intel_batchbuffer_flush(brw);
         goto retry;
      } else {
         int ret = intel_batchbuffer_flush(brw);
         WARN_ONCE(ret == -ENOSPC,
                   "i965: blorp emit exceeded available aperture space\n");
      }
   }

   if (unlikely(brw->always_flush_batch))
      intel_batchbuffer_flush(brw);

   /* We've smashed all state compared to what the normal 3D pipeline
    * rendering tracks for GL.
    */
   brw->state.dirty.brw = ~0;
   brw->state.dirty.cache = ~0;
   brw->ib.type = -1;
   intel_batchbuffer_clear_cache(brw);

   /* Flush the sampler cache so any texturing from the destination is
    * coherent.
    */
   intel_batchbuffer_emit_mi_flush(brw);
}
Пример #27
0
/**
 * Use blitting to clear the renderbuffers named by 'flags'.
 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
 * since that might include software renderbuffers or renderbuffers
 * which we're clearing with triangles.
 * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
 */
GLbitfield
intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
{
    struct intel_context *intel = intel_context(ctx);
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    GLuint clear_depth_value, clear_depth_mask;
    GLboolean all;
    GLint cx, cy, cw, ch;
    GLbitfield fail_mask = 0;
    BATCH_LOCALS;

    /*
     * Compute values for clearing the buffers.
     */
    clear_depth_value = 0;
    clear_depth_mask = 0;
    if (mask & BUFFER_BIT_DEPTH) {
        clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
        clear_depth_mask = XY_BLT_WRITE_RGB;
    }
    if (mask & BUFFER_BIT_STENCIL) {
        clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24;
        clear_depth_mask |= XY_BLT_WRITE_ALPHA;
    }

    cx = fb->_Xmin;
    if (fb->Name == 0)
        cy = ctx->DrawBuffer->Height - fb->_Ymax;
    else
        cy = fb->_Ymin;
    cw = fb->_Xmax - fb->_Xmin;
    ch = fb->_Ymax - fb->_Ymin;

    if (cw == 0 || ch == 0)
        return 0;

    all = (cw == fb->Width && ch == fb->Height);

    /* Loop over all renderbuffers */
    mask &= (1 << BUFFER_COUNT) - 1;
    while (mask) {
        GLuint buf = _mesa_ffs(mask) - 1;
        GLboolean is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL;
        struct intel_renderbuffer *irb;
        drm_intel_bo *write_buffer;
        int x1, y1, x2, y2;
        uint32_t clear_val;
        uint32_t BR13, CMD;
        int pitch, cpp;
        drm_intel_bo *aper_array[2];

        mask &= ~(1 << buf);

        irb = intel_get_renderbuffer(fb, buf);
        if (irb == NULL || irb->region == NULL || irb->region->buffer == NULL) {
            fail_mask |= 1 << buf;
            continue;
        }

        /* OK, clear this renderbuffer */
        write_buffer = intel_region_buffer(intel, irb->region,
                                           all ? INTEL_WRITE_FULL :
                                           INTEL_WRITE_PART);
        x1 = cx + irb->draw_x;
        y1 = cy + irb->draw_y;
        x2 = cx + cw + irb->draw_x;
        y2 = cy + ch + irb->draw_y;

        pitch = irb->region->pitch;
        cpp = irb->region->cpp;

        DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
            __FUNCTION__,
            irb->region->buffer, (pitch * cpp),
            x1, y1, x2 - x1, y2 - y1);

        BR13 = 0xf0 << 16;
        CMD = XY_COLOR_BLT_CMD;

        /* Setup the blit command */
        if (cpp == 4) {
            if (is_depth_stencil) {
                CMD |= clear_depth_mask;
            } else {
                /* clearing RGBA */
                CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
            }
        }

        assert(irb->region->tiling != I915_TILING_Y);

#ifndef I915
        if (irb->region->tiling != I915_TILING_NONE) {
            CMD |= XY_DST_TILED;
            pitch /= 4;
        }
#endif
        BR13 |= (pitch * cpp);

        if (is_depth_stencil) {
            clear_val = clear_depth_value;
        } else {
            uint8_t clear[4];
            GLclampf *color = ctx->Color.ClearColor;

            CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
            CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
            CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
            CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);

            switch (irb->Base.Format) {
            case MESA_FORMAT_ARGB8888:
            case MESA_FORMAT_XRGB8888:
                clear_val = PACK_COLOR_8888(clear[3], clear[0],
                                            clear[1], clear[2]);
                break;
            case MESA_FORMAT_RGB565:
                clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
                break;
            case MESA_FORMAT_ARGB4444:
                clear_val = PACK_COLOR_4444(clear[3], clear[0],
                                            clear[1], clear[2]);
                break;
            case MESA_FORMAT_ARGB1555:
                clear_val = PACK_COLOR_1555(clear[3], clear[0],
                                            clear[1], clear[2]);
                break;
            case MESA_FORMAT_A8:
                clear_val = PACK_COLOR_8888(clear[3], clear[3],
                                            clear[3], clear[3]);
                break;
            default:
                fail_mask |= 1 << buf;
                continue;
            }
        }

        BR13 |= br13_for_cpp(cpp);

        assert(x1 < x2);
        assert(y1 < y2);

        /* do space check before going any further */
        aper_array[0] = intel->batch.bo;
        aper_array[1] = write_buffer;

        if (drm_intel_bufmgr_check_aperture_space(aper_array,
                ARRAY_SIZE(aper_array)) != 0) {
            intel_batchbuffer_flush(intel);
        }

        BEGIN_BATCH_BLT(6);
        OUT_BATCH(CMD);
        OUT_BATCH(BR13);
        OUT_BATCH((y1 << 16) | x1);
        OUT_BATCH((y2 << 16) | x2);
        OUT_RELOC_FENCED(write_buffer,
                         I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                         0);
        OUT_BATCH(clear_val);
        ADVANCE_BATCH();

        if (intel->always_flush_cache)
            intel_batchbuffer_emit_mi_flush(intel);

        if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
            mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
    }

    return fail_mask;
}
Пример #28
0
/**
 * Implements fast depth clears on gen6+.
 *
 * Fast clears basically work by setting a flag in each of the subspans
 * represented in the HiZ buffer that says "When you need the depth values for
 * this subspan, it's the hardware's current clear value."  Then later rendering
 * can just use the static clear value instead of referencing memory.
 *
 * The tricky part of the implementation is that you have to have the clear
 * value that was used on the depth buffer in place for all further rendering,
 * at least until a resolve to the real depth buffer happens.
 */
static bool
brw_fast_clear_depth(struct gl_context *ctx)
{
   struct intel_context *intel = intel_context(ctx);
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   struct intel_renderbuffer *depth_irb =
      intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_mipmap_tree *mt = depth_irb->mt;

   if (intel->gen < 6)
      return false;

   if (!mt->hiz_mt)
      return false;

   /* We only handle full buffer clears -- otherwise you'd have to track whether
    * a previous clear had happened at a different clear value and resolve it
    * first.
    */
   if (ctx->Scissor.Enabled && !noop_scissor(ctx, fb)) {
      perf_debug("Failed to fast clear depth due to scissor being enabled.  "
                 "Possible 5%% performance win if avoided.\n");
      return false;
   }

   /* The rendered area has to be 8x4 samples, not resolved pixels, so we look
    * at the miptree slice dimensions instead of renderbuffer size.
    */
   if (mt->level[depth_irb->mt_level].width % 8 != 0 ||
       mt->level[depth_irb->mt_level].height % 4 != 0) {
      perf_debug("Failed to fast clear depth due to width/height %d,%d not "
                 "being aligned to 8,4.  Possible 5%% performance win if "
                 "avoided\n",
                 mt->level[depth_irb->mt_level].width,
                 mt->level[depth_irb->mt_level].height);
      return false;
   }

   uint32_t depth_clear_value;
   switch (mt->format) {
   case MESA_FORMAT_Z32_FLOAT_X24S8:
   case MESA_FORMAT_S8_Z24:
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
       *      enabled (the legacy method of clearing must be performed):
       *
       *      - If the depth buffer format is D32_FLOAT_S8X24_UINT or
       *        D24_UNORM_S8_UINT.
       */
      return false;

   case MESA_FORMAT_Z32_FLOAT:
      depth_clear_value = float_as_int(ctx->Depth.Clear);
      break;

   case MESA_FORMAT_Z16:
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
       *      enabled (the legacy method of clearing must be performed):
       *
       *      - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
       *        width of the map (LOD0) is not multiple of 16, fast clear
       *        optimization must be disabled.
       */
      if (intel->gen == 6 && (mt->level[depth_irb->mt_level].width % 16) != 0)
	 return false;
      /* FALLTHROUGH */

   default:
      depth_clear_value = fb->_DepthMax * ctx->Depth.Clear;
      break;
   }

   /* If we're clearing to a new clear value, then we need to resolve any clear
    * flags out of the HiZ buffer into the real depth buffer.
    */
   if (mt->depth_clear_value != depth_clear_value) {
      intel_miptree_all_slices_resolve_depth(intel, mt);
      mt->depth_clear_value = depth_clear_value;
   }

   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
    *
    *     "If other rendering operations have preceded this clear, a
    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
    *      must be issued before the rectangle primitive used for the depth
    *      buffer clear operation.
    */
   intel_batchbuffer_emit_mi_flush(intel);

   intel_hiz_exec(intel, mt, depth_irb->mt_level, depth_irb->mt_layer,
		  GEN6_HIZ_OP_DEPTH_CLEAR);

   if (intel->gen == 6) {
      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
       *
       *     "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be followed
       *      by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
       *      followed by Depth FLUSH'
      */
      intel_batchbuffer_emit_mi_flush(intel);
   }

   /* Now, the HiZ buffer contains data that needs to be resolved to the depth
    * buffer.
    */
   intel_renderbuffer_set_needs_depth_resolve(depth_irb);

   return true;
}
Пример #29
0
/* Copy BitBlt
 */
GLboolean
intelEmitCopyBlit(struct intel_context *intel,
                  GLuint cpp,
                  GLshort src_pitch,
                  drm_intel_bo *src_buffer,
                  GLuint src_offset,
                  uint32_t src_tiling,
                  GLshort dst_pitch,
                  drm_intel_bo *dst_buffer,
                  GLuint dst_offset,
                  uint32_t dst_tiling,
                  GLshort src_x, GLshort src_y,
                  GLshort dst_x, GLshort dst_y,
                  GLshort w, GLshort h,
                  GLenum logic_op)
{
    GLuint CMD, BR13, pass = 0;
    int dst_y2 = dst_y + h;
    int dst_x2 = dst_x + w;
    drm_intel_bo *aper_array[3];
    BATCH_LOCALS;

    if (dst_tiling != I915_TILING_NONE) {
        if (dst_offset & 4095)
            return GL_FALSE;
        if (dst_tiling == I915_TILING_Y)
            return GL_FALSE;
    }
    if (src_tiling != I915_TILING_NONE) {
        if (src_offset & 4095)
            return GL_FALSE;
        if (src_tiling == I915_TILING_Y)
            return GL_FALSE;
    }

    /* do space check before going any further */
    do {
        aper_array[0] = intel->batch.bo;
        aper_array[1] = dst_buffer;
        aper_array[2] = src_buffer;

        if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
            intel_batchbuffer_flush(intel);
            pass++;
        } else
            break;
    } while (pass < 2);

    if (pass >= 2)
        return GL_FALSE;

    intel_batchbuffer_require_space(intel, 8 * 4, true);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
        dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

    src_pitch *= cpp;
    dst_pitch *= cpp;

    /* For big formats (such as floating point), do the copy using 32bpp and
     * multiply the coordinates.
     */
    if (cpp > 4) {
        assert(cpp % 4 == 0);
        dst_x *= cpp / 4;
        dst_x2 *= cpp / 4;
        src_x *= cpp / 4;
        cpp = 4;
    }

    BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;

    switch (cpp) {
    case 1:
    case 2:
        CMD = XY_SRC_COPY_BLT_CMD;
        break;
    case 4:
        CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
        break;
    default:
        return GL_FALSE;
    }

#ifndef I915
    if (dst_tiling != I915_TILING_NONE) {
        CMD |= XY_DST_TILED;
        dst_pitch /= 4;
    }
    if (src_tiling != I915_TILING_NONE) {
        CMD |= XY_SRC_TILED;
        src_pitch /= 4;
    }
#endif

    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
        return GL_TRUE;
    }

    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);

    BEGIN_BATCH_BLT(8);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
    OUT_BATCH((dst_y2 << 16) | dst_x2);
    OUT_RELOC_FENCED(dst_buffer,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     dst_offset);
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH((uint16_t)src_pitch);
    OUT_RELOC_FENCED(src_buffer,
                     I915_GEM_DOMAIN_RENDER, 0,
                     src_offset);
    ADVANCE_BATCH();

    intel_batchbuffer_emit_mi_flush(intel);

    return GL_TRUE;
}