示例#1
0
int
brw_bo_map_gtt(struct brw_context *brw, drm_intel_bo *bo, const char *bo_name)
{
   if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo))
      return drm_intel_gem_bo_map_gtt(bo);

   float start_time = get_time();

   int ret = drm_intel_gem_bo_map_gtt(bo);

   perf_debug("GTT mapping a busy %s BO stalled and took %.03f ms.\n",
              bo_name, (get_time() - start_time) * 1000);

   return ret;
}
示例#2
0
文件: intel_upload.c 项目: Sheph/mesa
/**
 * Interface for getting memory for uploading streamed data to the GPU
 *
 * In most cases, streamed data (for GPU state structures, for example) is
 * uploaded through brw_state_batch(), since that interface allows relocations
 * from the streamed space returned to other BOs.  However, that interface has
 * the restriction that the amount of space allocated has to be "small" (see
 * estimated_max_prim_size in brw_draw.c).
 *
 * This interface, on the other hand, is able to handle arbitrary sized
 * allocation requests, though it will batch small allocations into the same
 * BO for efficiency and reduced memory footprint.
 *
 * \note The returned pointer is valid only until intel_upload_finish(), which
 * will happen at batch flush or the next
 * intel_upload_space()/intel_upload_data().
 *
 * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
 * entry, and will have a reference to the new BO containing the state on
 * return.
 *
 * \param out_offset Offset within the buffer object that the data will land.
 */
void *
intel_upload_space(struct brw_context *brw,
                   uint32_t size,
                   uint32_t alignment,
                   drm_intel_bo **out_bo,
                   uint32_t *out_offset)
{
   uint32_t offset;

   offset = ALIGN_NPOT(brw->upload.next_offset, alignment);
   if (brw->upload.bo && offset + size > brw->upload.bo->size) {
      intel_upload_finish(brw);
      offset = 0;
   }

   if (!brw->upload.bo) {
      brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data",
                                          MAX2(INTEL_UPLOAD_SIZE, size), 4096);
      if (brw->has_llc)
         drm_intel_bo_map(brw->upload.bo, true);
      else
         drm_intel_gem_bo_map_gtt(brw->upload.bo);
   }

   brw->upload.next_offset = offset + size;

   *out_offset = offset;
   if (*out_bo != brw->upload.bo) {
      drm_intel_bo_unreference(*out_bo);
      *out_bo = brw->upload.bo;
      drm_intel_bo_reference(brw->upload.bo);
   }

   return brw->upload.bo->virtual + offset;
}
/* Creates a new VS constant buffer reflecting the current VS program's
 * constants, if needed by the VS program.
 *
 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 * state atom.
 */
static void
brw_upload_vs_pull_constants(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
   /* BRW_NEW_VERTEX_PROGRAM */
   struct brw_vertex_program *vp =
      (struct brw_vertex_program *) brw->vertex_program;
   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
   int i;

   if (vp->program.IsNVProgram)
      _mesa_load_tracked_matrices(ctx);

   /* Updates the ParamaterValues[i] pointers for all parameters of the
    * basic type of PROGRAM_STATE_VAR.
    */
   _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);

   /* CACHE_NEW_VS_PROG */
   if (!brw->vs.prog_data->nr_pull_params) {
      if (brw->vs.const_bo) {
	 drm_intel_bo_unreference(brw->vs.const_bo);
	 brw->vs.const_bo = NULL;
	 brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
	 brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
      }
      return;
   }

   /* _NEW_PROGRAM_CONSTANTS */
   drm_intel_bo_unreference(brw->vs.const_bo);
   brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
					 brw->vs.prog_data->nr_pull_params * 4,
					 64);

   drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
   for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) {
      memcpy(brw->vs.const_bo->virtual + i * 4,
	     brw->vs.prog_data->pull_param[i],
	     4);
   }

   if (0) {
      for (i = 0; i < params->NumParameters; i++) {
	 float *row = (float *)brw->vs.const_bo->virtual + i * 4;
	 printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
		i, row[0], row[1], row[2], row[3]);
      }
   }

   drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);

   const int surf = SURF_INDEX_VERT_CONST_BUFFER;
   intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
				       params->NumParameters,
				       &brw->bind.surf_offset[surf]);

   brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
示例#4
0
static void *
i915_drm_buffer_map(struct i915_winsys *iws,
                     struct i915_winsys_buffer *buffer,
                     boolean write)
{
   struct i915_drm_buffer *buf = i915_drm_buffer(buffer);
   drm_intel_bo *bo = intel_bo(buffer);
   int ret = 0;

   assert(bo);

   if (buf->map_count)
      goto out;

   ret = drm_intel_gem_bo_map_gtt(bo);

   buf->ptr = bo->virtual;

   assert(ret == 0);
out:
   if (ret)
      return NULL;

   buf->map_count++;
   return buf->ptr;
}
示例#5
0
/* XXX: Thread safety?
 */
void *
intel_region_map(struct intel_context *intel, struct intel_region *region,
                 GLbitfield mode)
{
   /* We have the region->map_refcount controlling mapping of the BO because
    * in software fallbacks we may end up mapping the same buffer multiple
    * times on Mesa's behalf, so we refcount our mappings to make sure that
    * the pointer stays valid until the end of the unmap chain.  However, we
    * must not emit any batchbuffers between the start of mapping and the end
    * of unmapping, or further use of the map will be incoherent with the GPU
    * rendering done by that batchbuffer. Hence we assert in
    * intel_batchbuffer_flush() that that doesn't happen, which means that the
    * flush is only needed on first map of the buffer.
    */

   _DBG("%s %p\n", __FUNCTION__, region);
   if (!region->map_refcount) {
      intel_flush(&intel->ctx);

      if (region->tiling != I915_TILING_NONE)
	 drm_intel_gem_bo_map_gtt(region->bo);
      else
	 drm_intel_bo_map(region->bo, true);

      region->map = region->bo->virtual;
   }
static int
map_bo(struct buffer *my_buf)
{
	if (drm_intel_gem_bo_map_gtt(my_buf->bo) != 0)
		return 0;

	my_buf->mmap = my_buf->bo->virtual;

	return 1;
}
示例#7
0
void
brw_upload_vec4_pull_constants(struct brw_context *brw,
                               GLbitfield brw_new_constbuf,
                               const struct gl_program *prog,
                               struct brw_stage_state *stage_state,
                               const struct brw_vec4_prog_data *prog_data)
{
   int i;
   uint32_t surf_index = prog_data->base.binding_table.pull_constants_start;

   /* Updates the ParamaterValues[i] pointers for all parameters of the
    * basic type of PROGRAM_STATE_VAR.
    */
   _mesa_load_state_parameters(&brw->ctx, prog->Parameters);

   if (!prog_data->nr_pull_params) {
      if (stage_state->const_bo) {
	 drm_intel_bo_unreference(stage_state->const_bo);
	 stage_state->const_bo = NULL;
	 stage_state->surf_offset[surf_index] = 0;
	 brw->state.dirty.brw |= brw_new_constbuf;
      }
      return;
   }

   /* _NEW_PROGRAM_CONSTANTS */
   drm_intel_bo_unreference(stage_state->const_bo);
   uint32_t size = prog_data->nr_pull_params * 4;
   stage_state->const_bo = drm_intel_bo_alloc(brw->bufmgr, "vec4_const_buffer",
                                           size, 64);

   drm_intel_gem_bo_map_gtt(stage_state->const_bo);

   for (i = 0; i < prog_data->nr_pull_params; i++) {
      memcpy(stage_state->const_bo->virtual + i * 4,
	     prog_data->pull_param[i],
	     4);
   }

   if (0) {
      for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
	 float *row = (float *)stage_state->const_bo->virtual + i * 4;
	 printf("const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
		i, row[0], row[1], row[2], row[3]);
      }
   }

   drm_intel_gem_bo_unmap_gtt(stage_state->const_bo);

   brw_create_constant_surface(brw, stage_state->const_bo, 0, size,
                               &stage_state->surf_offset[surf_index],
                               false);

   brw->state.dirty.brw |= brw_new_constbuf;
}
示例#8
0
/**
 * Called via glMapBufferARB().
 */
static void *
intel_bufferobj_map(struct gl_context * ctx,
                    GLenum target,
                    GLenum access, struct gl_buffer_object *obj)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
   GLboolean read_only = (access == GL_READ_ONLY_ARB);
   GLboolean write_only = (access == GL_WRITE_ONLY_ARB);

   assert(intel_obj);

   if (intel_obj->sys_buffer) {
      if (!read_only && intel_obj->source) {
	 release_buffer(intel_obj);
      }

      if (!intel_obj->buffer || intel_obj->source) {
	 obj->Pointer = intel_obj->sys_buffer;
	 obj->Length = obj->Size;
	 obj->Offset = 0;
	 return obj->Pointer;
      }

      free(intel_obj->sys_buffer);
      intel_obj->sys_buffer = NULL;
   }

   /* Flush any existing batchbuffer that might reference this data. */
   if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
      intel_flush(ctx);

   if (intel_obj->region)
      intel_bufferobj_cow(intel, intel_obj);

   if (intel_obj->buffer == NULL) {
      obj->Pointer = NULL;
      return NULL;
   }

   if (write_only) {
      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
      intel_obj->mapped_gtt = GL_TRUE;
   } else {
      drm_intel_bo_map(intel_obj->buffer, !read_only);
      intel_obj->mapped_gtt = GL_FALSE;
   }

   obj->Pointer = intel_obj->buffer->virtual;
   obj->Length = obj->Size;
   obj->Offset = 0;

   return obj->Pointer;
}
示例#9
0
void *
intel_bo_map_gtt(struct intel_bo *bo)
{
   int err;

   err = drm_intel_gem_bo_map_gtt(gem_bo(bo));
   if (err) {
      debug_error("failed to map bo");
      return NULL;
   }

   return gem_bo(bo)->virtual;
}
示例#10
0
void intelFlushBatch(void)
{
	dri_bo *bo;

	i965_end_batch();

	drm_intel_bo_exec(xvmc_driver->batch.buf,
			  xvmc_driver->batch.ptr - xvmc_driver->batch.init_ptr,
			  0, 0, 0);

	bo = drm_intel_bo_alloc(xvmc_driver->bufmgr,
				"batch buffer", BATCH_SIZE, 0x1000);
	if (bo != NULL && drm_intel_gem_bo_map_gtt(bo) == 0) {
		drm_intel_bo_unreference(xvmc_driver->batch.buf);
		xvmc_driver->batch.buf = bo;
	} else {
		if (bo != NULL)
			drm_intel_bo_unreference(bo);
		drm_intel_gem_bo_map_gtt(xvmc_driver->batch.buf);
	}

	reset_batch();
}
示例#11
0
Bool intelInitBatchBuffer(void)
{
	if ((xvmc_driver->batch.buf =
	     drm_intel_bo_alloc(xvmc_driver->bufmgr,
				"batch buffer", BATCH_SIZE, 0x1000)) == NULL) {
		fprintf(stderr, "unable to alloc batch buffer\n");
		return False;
	}

	if (drm_intel_gem_bo_map_gtt(xvmc_driver->batch.buf)) {
		drm_intel_bo_unreference(xvmc_driver->batch.buf);
		return False;
	}

	reset_batch();
	return True;
}
示例#12
0
/* XXX: Thread safety?
 */
GLubyte *
intel_region_map(struct intel_context *intel, struct intel_region *region)
{
   intelFlush(&intel->ctx);

   _DBG("%s %p\n", __FUNCTION__, region);
   if (!region->map_refcount++) {
      if (region->pbo)
         intel_region_cow(intel, region);

      if (region->tiling != I915_TILING_NONE &&
	  intel->intelScreen->kernel_exec_fencing)
	 drm_intel_gem_bo_map_gtt(region->buffer);
      else
	 dri_bo_map(region->buffer, GL_TRUE);
      region->map = region->buffer->virtual;
   }

   return region->map;
}
void *
intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
{
    drm_intel_bo *bo = mt->region->bo;

    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
        if (drm_intel_bo_busy(bo)) {
            perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
        }
    }

    intel_flush(&intel->ctx);

    if (mt->region->tiling != I915_TILING_NONE)
        drm_intel_gem_bo_map_gtt(bo);
    else
        drm_intel_bo_map(bo, true);

    return bo->virtual;
}
示例#14
0
void
intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
{
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);

   if (irb == NULL || irb->region == NULL)
      return;

   drm_intel_gem_bo_map_gtt(irb->region->buffer);

   rb->Data = irb->region->buffer->virtual;
   rb->RowStride = irb->region->pitch;

   /* Flip orientation if it's the window system buffer */
   if (!rb->Name) {
      rb->Data += rb->RowStride * (irb->region->height - 1) * irb->region->cpp;
      rb->RowStride = -rb->RowStride;
   }

   intel_set_span_functions(intel, rb);
}
示例#15
0
static void init_buffer(struct scratch_buf *buf, unsigned size)
{
	buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
	assert(buf->bo);
	buf->tiling = I915_TILING_NONE;
	buf->stride = 4096;

	sanitize_stride(buf);

	if (options.no_hw)
		buf->data = malloc(size);
	else {
		if (options.use_cpu_maps)
			drm_intel_bo_map(buf->bo, 1);
		else
			drm_intel_gem_bo_map_gtt(buf->bo);
		buf->data = buf->bo->virtual;
	}

	buf->num_tiles = options.tiles_per_buf;
}
示例#16
0
static void
copy_array_to_vbo_array( struct brw_context *brw,
			 struct brw_vertex_element *element,
			 GLuint dst_stride)
{
   struct intel_context *intel = &brw->intel;
   GLuint size = element->count * dst_stride;

   get_space(brw, size, &element->bo, &element->offset);

   if (element->glarray->StrideB == 0) {
      assert(element->count == 1);
      element->stride = 0;
   } else {
      element->stride = dst_stride;
   }

   if (dst_stride == element->glarray->StrideB) {
      if (intel->intelScreen->kernel_exec_fencing) {
	 drm_intel_gem_bo_map_gtt(element->bo);
	 memcpy((char *)element->bo->virtual + element->offset,
		element->glarray->Ptr, size);
	 drm_intel_gem_bo_unmap_gtt(element->bo);
      } else {
/**
 * Called via glMapBufferRange().
 *
 * The goal of this extension is to allow apps to accumulate their rendering
 * at the same time as they accumulate their buffer object.  Without it,
 * you'd end up blocking on execution of rendering every time you mapped
 * the buffer to put new data in.
 *
 * We support it in 3 ways: If unsynchronized, then don't bother
 * flushing the batchbuffer before mapping the buffer, which can save blocking
 * in many cases.  If we would still block, and they allow the whole buffer
 * to be invalidated, then just allocate a new buffer to replace the old one.
 * If not, and we'd block, and they allow the subrange of the buffer to be
 * invalidated, then we can make a new little BO, let them write into that,
 * and blit it into the real BO at unmap time.
 */
static void *
intel_bufferobj_map_range(GLcontext * ctx,
			  GLenum target, GLintptr offset, GLsizeiptr length,
			  GLbitfield access, struct gl_buffer_object *obj)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);

   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
    * internally uses our functions directly.
    */
   obj->Offset = offset;
   obj->Length = length;
   obj->AccessFlags = access;

   if (intel_obj->sys_buffer) {
      obj->Pointer = intel_obj->sys_buffer + offset;
      return obj->Pointer;
   }

   if (intel_obj->region)
      intel_bufferobj_cow(intel, intel_obj);

   /* If the mapping is synchronized with other GL operations, flush
    * the batchbuffer so that GEM knows about the buffer access for later
    * syncing.
    */
   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
       drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
      intelFlush(ctx);

   if (intel_obj->buffer == NULL) {
      obj->Pointer = NULL;
      return NULL;
   }

   /* If the user doesn't care about existing buffer contents and mapping
    * would cause us to block, then throw out the old buffer.
    */
   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
       drm_intel_bo_busy(intel_obj->buffer)) {
      drm_intel_bo_unreference(intel_obj->buffer);
      intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
				       intel_obj->Base.Size, 64);
   }

   /* If the user is mapping a range of an active buffer object but
    * doesn't require the current contents of that range, make a new
    * BO, and we'll copy what they put in there out at unmap or
    * FlushRange time.
    */
   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
       drm_intel_bo_busy(intel_obj->buffer)) {
      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
	 intel_obj->range_map_buffer = _mesa_malloc(length);
	 obj->Pointer = intel_obj->range_map_buffer;
      } else {
	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
						      "range map",
						      length, 64);
	 if (!(access & GL_MAP_READ_BIT) &&
	     intel->intelScreen->kernel_exec_fencing) {
	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
	    intel_obj->mapped_gtt = GL_TRUE;
	 } else {
	    drm_intel_bo_map(intel_obj->range_map_bo,
			     (access & GL_MAP_WRITE_BIT) != 0);
	    intel_obj->mapped_gtt = GL_FALSE;
	 }
	 obj->Pointer = intel_obj->range_map_bo->virtual;
      }
      return obj->Pointer;
   }

   if (!(access & GL_MAP_READ_BIT) &&
       intel->intelScreen->kernel_exec_fencing) {
      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
      intel_obj->mapped_gtt = GL_TRUE;
   } else {
      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
      intel_obj->mapped_gtt = GL_FALSE;
   }

   obj->Pointer = intel_obj->buffer->virtual + offset;
   return obj->Pointer;
}
示例#18
0
/* Upload a new set of constants.  Too much variability to go into the
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
static void prepare_constant_buffer(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   const struct brw_vertex_program *vp =
      brw_vertex_program_const(brw->vertex_program);
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   GLfloat *buf;
   GLuint i;

   if (sz == 0) {
      brw->curbe.last_bufsz  = 0;
      return;
   }

   buf = brw->curbe.next_buf;

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      GLuint offset = brw->curbe.wm_start * 16;

      /* copy float constants */
      for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
	 buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
					 *brw->wm.prog_data->param[i]);
      }
   }


   /* The clipplanes are actually delivered to both CLIP and VS units.
    * VS uses them to calculate the outcode bitmasks.
    */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      assert(MAX_CLIP_PLANES == 6);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
	    buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
	    buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
	    buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
	    i++;
	 }
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      GLuint offset = brw->curbe.vs_start * 16;
      GLuint nr = brw->vs.prog_data->nr_params / 4;

      /* Load the subset of push constants that will get used when
       * we also have a pull constant buffer.
       */
      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
	 if (brw->vs.constant_map[i] != -1) {
	    assert(brw->vs.constant_map[i] <= nr);
	    memcpy(buf + offset + brw->vs.constant_map[i] * 4,
		   vp->program.Base.Parameters->ParameterValues[i],
		   4 * sizeof(float));
	 }
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4) 
	 printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
		buf[i+0], buf[i+1], buf[i+2], buf[i+3]);

      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
	     brw->curbe.last_buf, buf,
	     bufsz, brw->curbe.last_bufsz,
	     brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
   }

   if (brw->curbe.curbe_bo != NULL &&
       bufsz == brw->curbe.last_bufsz &&
       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
      /* constants have not changed */
   } else {
      /* Update the record of what our last set of constants was.  We
       * don't just flip the pointers because we don't fill in the
       * data in the padding between the entries.
       */
      memcpy(brw->curbe.last_buf, buf, bufsz);
      brw->curbe.last_bufsz = bufsz;

      if (brw->curbe.curbe_bo != NULL &&
	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
      {
	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
	 drm_intel_bo_unreference(brw->curbe.curbe_bo);
	 brw->curbe.curbe_bo = NULL;
      }

      if (brw->curbe.curbe_bo == NULL) {
	 /* Allocate a single page for CURBE entries for this batchbuffer.
	  * They're generally around 64b.
	  */
	 brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->intel.bufmgr, "CURBE",
						  4096, 1 << 6);
	 brw->curbe.curbe_next_offset = 0;
	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
	 assert(bufsz < 4096);
      }

      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
      brw->curbe.curbe_next_offset += bufsz;
      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);

      /* Copy data to the buffer:
       */
      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
	     buf,
	     bufsz);
   }

   brw_add_validated_bo(brw, brw->curbe.curbe_bo);

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */
}
示例#19
0
/* Ensure the supplied VA surface has valid storage for decoding the
   current picture */
VAStatus
avc_ensure_surface_bo(
    VADriverContextP                    ctx,
    struct decode_state                *decode_state,
    struct object_surface              *obj_surface,
    const VAPictureParameterBufferH264 *pic_param
)
{
    VAStatus va_status;
    uint32_t hw_fourcc, fourcc, subsample, chroma_format;

    /* Validate chroma format */
    switch (pic_param->seq_fields.bits.chroma_format_idc) {
    case 0: // Grayscale
        fourcc = VA_FOURCC_Y800;
        subsample = SUBSAMPLE_YUV400;
        chroma_format = VA_RT_FORMAT_YUV400;
        break;
    case 1: // YUV 4:2:0
        fourcc = VA_FOURCC_NV12;
        subsample = SUBSAMPLE_YUV420;
        chroma_format = VA_RT_FORMAT_YUV420;
        break;
    default:
        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
    }

    /* Determine the HW surface format, bound to VA config needs */
    if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
        hw_fourcc = fourcc;
    else {
        hw_fourcc = 0;
        switch (fourcc) {
        case VA_FOURCC_Y800: // Implement with an NV12 surface
            if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
                hw_fourcc = VA_FOURCC_NV12;
                subsample = SUBSAMPLE_YUV420;
            }
            break;
        }
    }
    if (!hw_fourcc)
        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;

    /* (Re-)allocate the underlying surface buffer store, if necessary */
    if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
        struct i965_driver_data * const i965 = i965_driver_data(ctx);

        i965_destroy_surface_storage(obj_surface);
        va_status = i965_check_alloc_surface_bo(ctx, obj_surface,
            i965->codec_info->has_tiled_surface, hw_fourcc, subsample);
        if (va_status != VA_STATUS_SUCCESS)
            return va_status;
    }

    /* Fake chroma components if grayscale is implemented on top of NV12 */
    if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) {
        const uint32_t uv_offset = obj_surface->width * obj_surface->height;
        const uint32_t uv_size   = obj_surface->width * obj_surface->height / 2;

        drm_intel_gem_bo_map_gtt(obj_surface->bo);
        memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
        drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
    }
    return VA_STATUS_SUCCESS;
}
示例#20
0
/* Copy BitBlt
 */
GLboolean
intelEmitCopyBlit(struct intel_context *intel,
		  GLuint cpp,
		  GLshort src_pitch,
		  dri_bo *src_buffer,
		  GLuint src_offset,
		  uint32_t src_tiling,
		  GLshort dst_pitch,
		  dri_bo *dst_buffer,
		  GLuint dst_offset,
		  uint32_t dst_tiling,
		  GLshort src_x, GLshort src_y,
		  GLshort dst_x, GLshort dst_y,
		  GLshort w, GLshort h,
		  GLenum logic_op)
{
   GLuint CMD, BR13, pass = 0;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
   dri_bo *aper_array[3];
   BATCH_LOCALS;

   /* Blits are in a different ringbuffer so we don't use them. */
   if (intel->gen >= 6)
      return GL_FALSE;

   if (dst_tiling != I915_TILING_NONE) {
      if (dst_offset & 4095)
	 return GL_FALSE;
      if (dst_tiling == I915_TILING_Y)
	 return GL_FALSE;
   }
   if (src_tiling != I915_TILING_NONE) {
      if (src_offset & 4095)
	 return GL_FALSE;
      if (src_tiling == I915_TILING_Y)
	 return GL_FALSE;
   }

   /* do space check before going any further */
   do {
       aper_array[0] = intel->batch->buf;
       aper_array[1] = dst_buffer;
       aper_array[2] = src_buffer;

       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
           intel_batchbuffer_flush(intel->batch);
           pass++;
       } else
           break;
   } while (pass < 2);

   if (pass >= 2) {
      drm_intel_gem_bo_map_gtt(dst_buffer);
      drm_intel_gem_bo_map_gtt(src_buffer);
      _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
		      cpp,
		      dst_pitch,
		      dst_x, dst_y,
		      w, h,
		      (GLubyte *)src_buffer->virtual + src_offset,
		      src_pitch,
		      src_x, src_y);
      drm_intel_gem_bo_unmap_gtt(src_buffer);
      drm_intel_gem_bo_unmap_gtt(dst_buffer);

      return GL_TRUE;
   }
示例#21
0
static void
upload_vs_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLcontext *ctx = &intel->ctx;
   const struct brw_vertex_program *vp =
      brw_vertex_program_const(brw->vertex_program);
   unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
   drm_intel_bo *constant_bo;
   int i;

   if (vp->use_const_buffer || nr_params == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      if (brw->vertex_program->IsNVProgram)
	 _mesa_load_tracked_matrices(ctx);

      /* Updates the ParamaterValues[i] pointers for all parameters of the
       * basic type of PROGRAM_STATE_VAR.
       */
      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);

      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
				       nr_params * 4 * sizeof(float),
				       4096);
      drm_intel_gem_bo_map_gtt(constant_bo);
      for (i = 0; i < nr_params; i++) {
	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
		vp->program.Base.Parameters->ParameterValues[i],
		4 * sizeof(float));
      }
      drm_intel_gem_bo_unmap_gtt(constant_bo);

      BEGIN_BATCH(5);
      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
		GEN6_CONSTANT_BUFFER_0_ENABLE |
		(5 - 2));
      OUT_RELOC(constant_bo,
		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
		ALIGN(nr_params, 2) / 2 - 1);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();

      drm_intel_bo_unreference(constant_bo);
   }

   intel_batchbuffer_emit_mi_flush(intel->batch);

   BEGIN_BATCH(6);
   OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
   OUT_BATCH(0); /* scratch space base offset */
   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
	     GEN6_VS_STATISTICS_ENABLE);
   ADVANCE_BATCH();

   intel_batchbuffer_emit_mi_flush(intel->batch);
}
示例#22
0
/* Upload a new set of constants.  Too much variability to go into the
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   const struct brw_vertex_program *vp =
      brw_vertex_program_const(brw->vertex_program);
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   GLfloat *buf;
   GLuint i;
   gl_clip_plane *clip_planes;

   if (sz == 0) {
      brw->curbe.last_bufsz  = 0;
      goto emit;
   }

   buf = brw->curbe.next_buf;

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      GLuint offset = brw->curbe.wm_start * 16;

      /* copy float constants */
      for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
	 buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
					 brw->wm.prog_data->param[i]);
      }
   }


   /* When using the old VS backend, the clipplanes are actually delivered to
    * both CLIP and VS units.  VS uses them to calculate the outcode bitmasks.
    *
    * When using the new VS backend, it is responsible for setting up its own
    * clipplane constants if it needs them.  This results in a slight waste of
    * of curbe space, but the advantage is that the new VS backend can use its
    * general-purpose uniform layout code to store the clipplanes.
    */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      clip_planes = brw_select_clip_planes(ctx);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0] = clip_planes[j][0];
	    buf[offset + i * 4 + 1] = clip_planes[j][1];
	    buf[offset + i * 4 + 2] = clip_planes[j][2];
	    buf[offset + i * 4 + 3] = clip_planes[j][3];
	    i++;
	 }
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      GLuint offset = brw->curbe.vs_start * 16;
      GLuint nr = brw->vs.prog_data->nr_params / 4;

      if (brw->vs.prog_data->uses_new_param_layout) {
	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
	    buf[offset + i] = *brw->vs.prog_data->param[i];
	 }
      } else {
	 /* Load the subset of push constants that will get used when
	  * we also have a pull constant buffer.
	  */
	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
	    if (brw->vs.constant_map[i] != -1) {
	       assert(brw->vs.constant_map[i] <= nr);
	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
		      vp->program.Base.Parameters->ParameterValues[i],
		      4 * sizeof(float));
	    }
	 }
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4) 
	 printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
		buf[i+0], buf[i+1], buf[i+2], buf[i+3]);

      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
	     brw->curbe.last_buf, buf,
	     bufsz, brw->curbe.last_bufsz,
	     brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
   }

   if (brw->curbe.curbe_bo != NULL &&
       bufsz == brw->curbe.last_bufsz &&
       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
      /* constants have not changed */
   } else {
      /* Update the record of what our last set of constants was.  We
       * don't just flip the pointers because we don't fill in the
       * data in the padding between the entries.
       */
      memcpy(brw->curbe.last_buf, buf, bufsz);
      brw->curbe.last_bufsz = bufsz;

      if (brw->curbe.curbe_bo != NULL &&
	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
      {
	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
	 drm_intel_bo_unreference(brw->curbe.curbe_bo);
	 brw->curbe.curbe_bo = NULL;
      }

      if (brw->curbe.curbe_bo == NULL) {
	 /* Allocate a single page for CURBE entries for this batchbuffer.
	  * They're generally around 64b.
	  */
	 brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->intel.bufmgr, "CURBE",
						  4096, 1 << 6);
	 brw->curbe.curbe_next_offset = 0;
	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
	 assert(bufsz < 4096);
      }

      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
      brw->curbe.curbe_next_offset += bufsz;
      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);

      /* Copy data to the buffer:
       */
      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
	     buf,
	     bufsz);
   }

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */

emit:
   BEGIN_BATCH(2);
   if (brw->curbe.total_size == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
}
示例#23
0
/**
 * Called via glMapBufferRange and glMapBuffer
 *
 * The goal of this extension is to allow apps to accumulate their rendering
 * at the same time as they accumulate their buffer object.  Without it,
 * you'd end up blocking on execution of rendering every time you mapped
 * the buffer to put new data in.
 *
 * We support it in 3 ways: If unsynchronized, then don't bother
 * flushing the batchbuffer before mapping the buffer, which can save blocking
 * in many cases.  If we would still block, and they allow the whole buffer
 * to be invalidated, then just allocate a new buffer to replace the old one.
 * If not, and we'd block, and they allow the subrange of the buffer to be
 * invalidated, then we can make a new little BO, let them write into that,
 * and blit it into the real BO at unmap time.
 */
static void *
intel_bufferobj_map_range(struct gl_context * ctx,
			  GLintptr offset, GLsizeiptr length,
			  GLbitfield access, struct gl_buffer_object *obj)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);

   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
    * internally uses our functions directly.
    */
   obj->Offset = offset;
   obj->Length = length;
   obj->AccessFlags = access;

   if (intel_obj->sys_buffer) {
      const bool read_only =
	 (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;

      if (!read_only && intel_obj->source)
	 release_buffer(intel_obj);

      if (!intel_obj->buffer || intel_obj->source) {
	 obj->Pointer = intel_obj->sys_buffer + offset;
	 return obj->Pointer;
      }

      free(intel_obj->sys_buffer);
      intel_obj->sys_buffer = NULL;
   }

   if (intel_obj->buffer == NULL) {
      obj->Pointer = NULL;
      return NULL;
   }

   /* If the access is synchronized (like a normal buffer mapping), then get
    * things flushed out so the later mapping syncs appropriately through GEM.
    * If the user doesn't care about existing buffer contents and mapping would
    * cause us to block, then throw out the old buffer.
    *
    * If they set INVALIDATE_BUFFER, we can pitch the current contents to
    * achieve the required synchronization.
    */
   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
	 if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
	    drm_intel_bo_unreference(intel_obj->buffer);
	    intel_bufferobj_alloc_buffer(intel, intel_obj);
	 } else {
            perf_debug("Stalling on the GPU for mapping a busy buffer "
                       "object\n");
	    intel_flush(ctx);
	 }
      } else if (drm_intel_bo_busy(intel_obj->buffer) &&
		 (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
	 drm_intel_bo_unreference(intel_obj->buffer);
	 intel_bufferobj_alloc_buffer(intel, intel_obj);
      }
   }

   /* If the user is mapping a range of an active buffer object but
    * doesn't require the current contents of that range, make a new
    * BO, and we'll copy what they put in there out at unmap or
    * FlushRange time.
    */
   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
       drm_intel_bo_busy(intel_obj->buffer)) {
      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
	 intel_obj->range_map_buffer = malloc(length);
	 obj->Pointer = intel_obj->range_map_buffer;
      } else {
	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
						      "range map",
						      length, 64);
	 if (!(access & GL_MAP_READ_BIT)) {
	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
	 } else {
	    drm_intel_bo_map(intel_obj->range_map_bo,
			     (access & GL_MAP_WRITE_BIT) != 0);
	 }
	 obj->Pointer = intel_obj->range_map_bo->virtual;
      }
      return obj->Pointer;
   }

   if (access & GL_MAP_UNSYNCHRONIZED_BIT)
      drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
   else if (!(access & GL_MAP_READ_BIT)) {
      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
   } else {
      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
   }

   obj->Pointer = intel_obj->buffer->virtual + offset;
   return obj->Pointer;
}