Пример #1
0
/**
 * Interface for getting memory for uploading streamed data to the GPU
 *
 * In most cases, streamed data (for GPU state structures, for example) is
 * uploaded through brw_state_batch(), since that interface allows relocations
 * from the streamed space returned to other BOs.  However, that interface has
 * the restriction that the amount of space allocated has to be "small" (see
 * estimated_max_prim_size in brw_draw.c).
 *
 * This interface, on the other hand, is able to handle arbitrary sized
 * allocation requests, though it will batch small allocations into the same
 * BO for efficiency and reduced memory footprint.
 *
 * \note The returned pointer is valid only until intel_upload_finish(), which
 * will happen at batch flush or the next
 * intel_upload_space()/intel_upload_data().
 *
 * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
 * entry, and will have a reference to the new BO containing the state on
 * return.
 *
 * \param out_offset Offset within the buffer object that the data will land.
 */
void *
intel_upload_space(struct brw_context *brw,
                   uint32_t size,
                   uint32_t alignment,
                   drm_intel_bo **out_bo,
                   uint32_t *out_offset)
{
   uint32_t offset;

   offset = ALIGN_NPOT(brw->upload.next_offset, alignment);
   if (brw->upload.bo && offset + size > brw->upload.bo->size) {
      intel_upload_finish(brw);
      offset = 0;
   }

   if (!brw->upload.bo) {
      brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data",
                                          MAX2(INTEL_UPLOAD_SIZE, size), 4096);
      if (brw->has_llc)
         drm_intel_bo_map(brw->upload.bo, true);
      else
         drm_intel_gem_bo_map_gtt(brw->upload.bo);
   }

   brw->upload.next_offset = offset + size;

   *out_offset = offset;
   if (*out_bo != brw->upload.bo) {
      drm_intel_bo_unreference(*out_bo);
      *out_bo = brw->upload.bo;
      drm_intel_bo_reference(brw->upload.bo);
   }

   return brw->upload.bo->virtual + offset;
}
Пример #2
0
static __DRIimage *
intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
{
   __DRIimage *image;

   image = calloc(1, sizeof *image);
   if (image == NULL)
      return NULL;

   drm_intel_bo_reference(orig_image->bo);
   image->bo              = orig_image->bo;
   image->internal_format = orig_image->internal_format;
   image->planar_format   = orig_image->planar_format;
   image->dri_format      = orig_image->dri_format;
   image->format          = orig_image->format;
   image->offset          = orig_image->offset;
   image->width           = orig_image->width;
   image->height          = orig_image->height;
   image->pitch           = orig_image->pitch;
   image->tile_x          = orig_image->tile_x;
   image->tile_y          = orig_image->tile_y;
   image->has_depthstencil = orig_image->has_depthstencil;
   image->data            = loaderPrivate;

   memcpy(image->strides, orig_image->strides, sizeof(image->strides));
   memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets));

   return image;
}
Пример #3
0
/**
 * Allocates a block of space in the batchbuffer for indirect state.
 *
 * We don't want to allocate separate BOs for every bit of indirect
 * state in the driver.  It means overallocating by a significant
 * margin (4096 bytes, even if the object is just a 20-byte surface
 * state), and more buffers to walk and count for aperture size checking.
 *
 * However, due to the restrictions inposed by the aperture size
 * checking performance hacks, we can't have the batch point at a
 * separate indirect state buffer, because once the batch points at
 * it, no more relocations can be added to it.  So, we sneak these
 * buffers in at the top of the batchbuffer.
 */
void *
brw_state_batch(struct brw_context *brw,
		int size,
		int alignment,
		drm_intel_bo **out_bo,
		uint32_t *out_offset)
{
   struct intel_batchbuffer *batch = brw->intel.batch;
   uint32_t offset;

   assert(size < batch->buf->size);
   offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);

   /* If allocating from the top would wrap below the batchbuffer, or
    * if the batch's used space (plus the reserved pad) collides with our
    * space, then flush and try again.
    */
   if (batch->state_batch_offset < size ||
       offset < batch->ptr - batch->map + batch->reserved_space) {
      intel_batchbuffer_flush(batch);
      offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
   }

   batch->state_batch_offset = offset;

   if (*out_bo != batch->buf) {
      drm_intel_bo_unreference(*out_bo);
      drm_intel_bo_reference(batch->buf);
      *out_bo = batch->buf;
   }

   *out_offset = offset;
   return batch->map + offset;
}
Пример #4
0
struct intel_bo *
intel_bo_ref(struct intel_bo *bo)
{
   if (bo)
      drm_intel_bo_reference(gem_bo(bo));

   return bo;
}
Пример #5
0
static __DRIimage *
intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
{
    int width, height, offset, stride, dri_format, index;
    struct intel_image_format *f;
    uint32_t mask_x, mask_y;
    __DRIimage *image;

    if (parent == NULL || parent->planar_format == NULL)
        return NULL;

    f = parent->planar_format;

    if (plane >= f->nplanes)
        return NULL;

    width = parent->region->width >> f->planes[plane].width_shift;
    height = parent->region->height >> f->planes[plane].height_shift;
    dri_format = f->planes[plane].dri_format;
    index = f->planes[plane].buffer_index;
    offset = parent->offsets[index];
    stride = parent->strides[index];

    image = intel_allocate_image(dri_format, loaderPrivate);
    if (image == NULL)
       return NULL;

    if (offset + height * stride > parent->region->bo->size) {
       _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
       free(image);
       return NULL;
    }

    image->region = calloc(sizeof(*image->region), 1);
    if (image->region == NULL) {
       free(image);
       return NULL;
    }

    image->region->cpp = _mesa_get_format_bytes(image->format);
    image->region->width = width;
    image->region->height = height;
    image->region->pitch = stride;
    image->region->refcount = 1;
    image->region->bo = parent->region->bo;
    drm_intel_bo_reference(image->region->bo);
    image->region->tiling = parent->region->tiling;
    image->offset = offset;
    intel_setup_image_from_dimensions(image);

    intel_region_get_tile_masks(image->region, &mask_x, &mask_y, false);
    if (offset & mask_x)
       _mesa_warning(NULL,
                     "intel_create_sub_image: offset not on tile boundary");

    return image;
}
Пример #6
0
static void
brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
{
   assert(!fence->batch_bo);
   assert(!fence->signalled);

   brw_emit_mi_flush(brw);
   fence->batch_bo = brw->batch.bo;
   drm_intel_bo_reference(fence->batch_bo);
   intel_batchbuffer_flush(brw);
}
struct intel_mipmap_tree *
intel_miptree_create_for_bo(struct intel_context *intel,
                            drm_intel_bo *bo,
                            mesa_format format,
                            uint32_t offset,
                            uint32_t width,
                            uint32_t height,
                            int pitch,
                            uint32_t tiling)
{
    struct intel_mipmap_tree *mt;

    struct intel_region *region = calloc(1, sizeof(*region));
    if (!region)
        return NULL;

    /* Nothing will be able to use this miptree with the BO if the offset isn't
     * aligned.
     */
    if (tiling != I915_TILING_NONE)
        assert(offset % 4096 == 0);

    /* miptrees can't handle negative pitch.  If you need flipping of images,
     * that's outside of the scope of the mt.
     */
    assert(pitch >= 0);

    mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
                                     0, 0,
                                     width, height, 1,
                                     true);
    if (!mt) {
        free(region);
        return mt;
    }

    region->cpp = mt->cpp;
    region->width = width;
    region->height = height;
    region->pitch = pitch;
    region->refcount = 1;
    drm_intel_bo_reference(bo);
    region->bo = bo;
    region->tiling = tiling;

    mt->region = region;
    mt->offset = offset;

    return mt;
}
Пример #8
0
struct pipe_fence_handle *
intel_drm_fence_create(drm_intel_bo *bo)
{
   struct intel_drm_fence *fence = CALLOC_STRUCT(intel_drm_fence);

   pipe_reference_init(&fence->reference, 1);
   /* bo is null if fence already expired */
   if (bo) {
      drm_intel_bo_reference(bo);
      fence->bo = bo;
   }

   return (struct pipe_fence_handle *)fence;
}
Пример #9
0
static void
intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
	       GLenum condition, GLbitfield flags)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_sync_object *sync = (struct intel_sync_object *)s;

   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
   intel_batchbuffer_emit_mi_flush(intel->batch);

   sync->bo = intel->batch->buf;
   drm_intel_bo_reference(sync->bo);

   intelFlush(ctx);
}
Пример #10
0
void intel_upload_data(struct intel_context *intel,
		       const void *ptr, GLuint size, GLuint align,
		       drm_intel_bo **return_bo,
		       GLuint *return_offset)
{
   GLuint base, delta;

   base = (intel->upload.offset + align - 1) / align * align;
   if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) {
      wrap_buffers(intel, size);
      base = 0;
   }

   drm_intel_bo_reference(intel->upload.bo);
   *return_bo = intel->upload.bo;
   *return_offset = base;

   delta = base - intel->upload.offset;
   if (intel->upload.buffer_len &&
       intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer))
   {
      drm_intel_bo_subdata(intel->upload.bo,
			   intel->upload.buffer_offset,
			   intel->upload.buffer_len,
			   intel->upload.buffer);
      intel->upload.buffer_len = 0;
   }

   if (size < sizeof(intel->upload.buffer))
   {
      if (intel->upload.buffer_len == 0)
	 intel->upload.buffer_offset = base;
      else
	 intel->upload.buffer_len += delta;

      memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size);
      intel->upload.buffer_len += size;
   }
   else
   {
      drm_intel_bo_subdata(intel->upload.bo, base, size, ptr);
   }

   intel->upload.offset = base + size;
}
Пример #11
0
static __DRIimage *
intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
{
    int width, height, offset, stride, dri_format, index;
    struct intel_image_format *f;
    __DRIimage *image;

    if (parent == NULL || parent->planar_format == NULL)
        return NULL;

    f = parent->planar_format;

    if (plane >= f->nplanes)
        return NULL;

    width = parent->width >> f->planes[plane].width_shift;
    height = parent->height >> f->planes[plane].height_shift;
    dri_format = f->planes[plane].dri_format;
    index = f->planes[plane].buffer_index;
    offset = parent->offsets[index];
    stride = parent->strides[index];

    image = intel_allocate_image(dri_format, loaderPrivate);
    if (image == NULL)
       return NULL;

    if (offset + height * stride > parent->bo->size) {
       _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
       free(image);
       return NULL;
    }

    image->bo = parent->bo;
    drm_intel_bo_reference(parent->bo);

    image->width = width;
    image->height = height;
    image->pitch = stride;
    image->offset = offset;

    intel_image_warn_if_unaligned(image, __func__);

    return image;
}
Пример #12
0
void intel_upload_unmap(struct intel_context *intel,
			const void *ptr, GLuint size, GLuint align,
			drm_intel_bo **return_bo,
			GLuint *return_offset)
{
   GLuint base;

   base = (intel->upload.offset + align - 1) / align * align;
   if (size > sizeof(intel->upload.buffer)) {
      drm_intel_bo_subdata(intel->upload.bo, base, size, ptr);
      free((void*)ptr);
   }

   drm_intel_bo_reference(intel->upload.bo);
   *return_bo = intel->upload.bo;
   *return_offset = base;

   intel->upload.offset = base + size;
}
Пример #13
0
void
intel_upload_data(struct brw_context *brw,
                  const void *ptr, GLuint size, GLuint align,
                  drm_intel_bo **return_bo,
                  GLuint *return_offset)
{
   GLuint base, delta;

   base = (brw->upload.offset + align - 1) / align * align;
   if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
      wrap_buffers(brw, size);
      base = 0;
   }

   drm_intel_bo_reference(brw->upload.bo);
   *return_bo = brw->upload.bo;
   *return_offset = base;

   delta = base - brw->upload.offset;
   if (brw->upload.buffer_len &&
       brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) {
      drm_intel_bo_subdata(brw->upload.bo,
                           brw->upload.buffer_offset,
                           brw->upload.buffer_len,
                           brw->upload.buffer);
      brw->upload.buffer_len = 0;
   }

   if (size < sizeof(brw->upload.buffer)) {
      if (brw->upload.buffer_len == 0)
         brw->upload.buffer_offset = base;
      else
         brw->upload.buffer_len += delta;

      memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size);
      brw->upload.buffer_len += size;
   } else {
      drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
   }

   brw->upload.offset = base + size;
}
Пример #14
0
static __DRIimage *
intel_create_sub_image(__DRIimage *parent,
                       int width, int height, int dri_format,
                       int offset, int pitch, void *loaderPrivate)
{
    __DRIimage *image;
    int cpp;
    uint32_t mask_x, mask_y;

    image = intel_allocate_image(dri_format, loaderPrivate);
    cpp = _mesa_get_format_bytes(image->format);
    if (offset + height * cpp * pitch > parent->region->bo->size) {
       _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
       FREE(image);
       return NULL;
    }

    image->region = calloc(sizeof(*image->region), 1);
    if (image->region == NULL) {
       FREE(image);
       return NULL;
    }

    image->region->cpp = _mesa_get_format_bytes(image->format);
    image->region->width = width;
    image->region->height = height;
    image->region->pitch = pitch;
    image->region->refcount = 1;
    image->region->bo = parent->region->bo;
    drm_intel_bo_reference(image->region->bo);
    image->region->tiling = parent->region->tiling;
    image->region->screen = parent->region->screen;
    image->offset = offset;

    intel_region_get_tile_masks(image->region, &mask_x, &mask_y);
    if (offset & mask_x)
       _mesa_warning(NULL,
                     "intel_create_sub_image: offset not on tile boundary");

    return image;
}
Пример #15
0
/**
 * Sets up a DRIImage structure to point to a slice out of a miptree.
 */
static void
intel_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image,
                                   struct intel_mipmap_tree *mt, GLuint level,
                                   GLuint zoffset)
{
   intel_miptree_make_shareable(brw, mt);

   intel_miptree_check_level_layer(mt, level, zoffset);

   image->width = minify(mt->physical_width0, level - mt->first_level);
   image->height = minify(mt->physical_height0, level - mt->first_level);
   image->pitch = mt->pitch;

   image->offset = intel_miptree_get_tile_offsets(mt, level, zoffset,
                                                  &image->tile_x,
                                                  &image->tile_y);

   drm_intel_bo_unreference(image->bo);
   image->bo = mt->bo;
   drm_intel_bo_reference(mt->bo);
}
Пример #16
0
static __DRIimage *
intel_create_image_from_renderbuffer(__DRIcontext *context,
				     int renderbuffer, void *loaderPrivate)
{
   __DRIimage *image;
   struct brw_context *brw = context->driverPrivate;
   struct gl_context *ctx = &brw->ctx;
   struct gl_renderbuffer *rb;
   struct intel_renderbuffer *irb;

   rb = _mesa_lookup_renderbuffer(ctx, renderbuffer);
   if (!rb) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
      return NULL;
   }

   irb = intel_renderbuffer(rb);
   intel_miptree_make_shareable(brw, irb->mt);
   image = calloc(1, sizeof *image);
   if (image == NULL)
      return NULL;

   image->internal_format = rb->InternalFormat;
   image->format = rb->Format;
   image->offset = 0;
   image->data = loaderPrivate;
   drm_intel_bo_unreference(image->bo);
   image->bo = irb->mt->bo;
   drm_intel_bo_reference(irb->mt->bo);
   image->width = rb->Width;
   image->height = rb->Height;
   image->pitch = irb->mt->pitch;
   image->dri_format = driGLFormatToImageFormat(image->format);
   image->has_depthstencil = irb->mt->stencil_mt? true : false;

   rb->NeedsFinishRenderTexture = true;
   return image;
}
Пример #17
0
/* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
static void
brw_try_draw_prims(struct gl_context *ctx,
                   const struct gl_client_array *arrays[],
                   const struct _mesa_prim *prims,
                   GLuint nr_prims,
                   const struct _mesa_index_buffer *ib,
                   GLuint min_index,
                   GLuint max_index,
                   struct gl_buffer_object *indirect)
{
   struct brw_context *brw = brw_context(ctx);
   GLuint i;
   bool fail_next = false;

   if (ctx->NewState)
      _mesa_update_state(ctx);

   /* Find the highest sampler unit used by each shader program.  A bit-count
    * won't work since ARB programs use the texture unit number as the sampler
    * index.
    */
   brw->wm.base.sampler_count =
      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
   brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
   brw->vs.base.sampler_count =
      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);

   /* We have to validate the textures *before* checking for fallbacks;
    * otherwise, the software fallback won't be able to rely on the
    * texture state, the firstLevel and lastLevel fields won't be
    * set in the intel texture object (they'll both be 0), and the
    * software fallback will segfault if it attempts to access any
    * texture level other than level 0.
    */
   brw_validate_textures(brw);

   intel_prepare_render(brw);

   /* This workaround has to happen outside of brw_upload_render_state()
    * because it may flush the batchbuffer for a blit, affecting the state
    * flags.
    */
   brw_workaround_depthstencil_alignment(brw, 0);

   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs(brw, arrays);

   brw->ib.ib = ib;
   brw->ctx.NewDriverState |= BRW_NEW_INDICES;

   brw->vb.min_index = min_index;
   brw->vb.max_index = max_index;
   brw->ctx.NewDriverState |= BRW_NEW_VERTICES;

   for (i = 0; i < nr_prims; i++) {
      int estimated_max_prim_size;
      const int sampler_state_size = 16;

      estimated_max_prim_size = 512; /* batchbuffer commands */
      estimated_max_prim_size += BRW_MAX_TEX_UNIT *
         (sampler_state_size + sizeof(struct gen5_sampler_default_color));
      estimated_max_prim_size += 1024; /* gen6 VS push constants */
      estimated_max_prim_size += 1024; /* gen6 WM push constants */
      estimated_max_prim_size += 512; /* misc. pad */

      /* Flush the batch if it's approaching full, so that we don't wrap while
       * we've got validated state that needs to be in the same batch as the
       * primitives.
       */
      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
      intel_batchbuffer_save_state(brw);

      if (brw->num_instances != prims[i].num_instances ||
          brw->basevertex != prims[i].basevertex) {
         brw->num_instances = prims[i].num_instances;
         brw->basevertex = prims[i].basevertex;
         if (i > 0) { /* For i == 0 we just did this before the loop */
            brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
            brw_merge_inputs(brw, arrays);
         }
      }

      brw->draw.gl_basevertex =
         prims[i].indexed ? prims[i].basevertex : prims[i].start;

      drm_intel_bo_unreference(brw->draw.draw_params_bo);

      if (prims[i].is_indirect) {
         /* Point draw_params_bo at the indirect buffer. */
         brw->draw.draw_params_bo =
            intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
         drm_intel_bo_reference(brw->draw.draw_params_bo);
         brw->draw.draw_params_offset =
            prims[i].indirect_offset + (prims[i].indexed ? 12 : 8);
      } else {
         /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
          * has to upload gl_BaseVertex and such if they're needed.
          */
         brw->draw.draw_params_bo = NULL;
         brw->draw.draw_params_offset = 0;
      }

      if (brw->gen < 6)
	 brw_set_prim(brw, &prims[i]);
      else
	 gen6_set_prim(brw, &prims[i]);

retry:

      /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
       * brw->ctx.NewDriverState.
       */
      if (brw->ctx.NewDriverState) {
	 brw->no_batch_wrap = true;
	 brw_upload_render_state(brw);
      }

      brw_emit_prim(brw, &prims[i], brw->primitive);

      brw->no_batch_wrap = false;

      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
	 if (!fail_next) {
	    intel_batchbuffer_reset_to_saved(brw);
	    intel_batchbuffer_flush(brw);
	    fail_next = true;
	    goto retry;
	 } else {
            int ret = intel_batchbuffer_flush(brw);
            WARN_ONCE(ret == -ENOSPC,
                      "i965: Single primitive emit exceeded "
                      "available aperture space\n");
	 }
      }

      /* Now that we know we haven't run out of aperture space, we can safely
       * reset the dirty bits.
       */
      if (brw->ctx.NewDriverState)
         brw_render_state_finished(brw);
   }

   if (brw->always_flush_batch)
      intel_batchbuffer_flush(brw);

   brw_state_cache_check_size(brw);
   brw_postdraw_set_buffers_need_resolve(brw);

   return;
}
Пример #18
0
/* Recalculate all state from scratch.  Perhaps not the most
 * efficient, but this has gotten complex enough that we need
 * something which is understandable and reliable.
 */
static bool
i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
{
   struct gl_context *ctx = &intel->ctx;
   struct i830_context *i830 = i830_context(ctx);
   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *tObj = tUnit->_Current;
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   struct gl_texture_image *firstImage;
   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
   GLuint *state = i830->state.Tex[unit], format, pitch;
   GLint lodbias;
   GLubyte border[4];
   GLuint dst_x, dst_y;

   memset(state, 0, sizeof(*state));

   /*We need to refcount these. */

   if (i830->state.tex_buffer[unit] != NULL) {
       drm_intel_bo_unreference(i830->state.tex_buffer[unit]);
       i830->state.tex_buffer[unit] = NULL;
   }

   if (!intel_finalize_mipmap_tree(intel, unit))
      return false;

   /* Get first image here, since intelObj->firstLevel will get set in
    * the intel_finalize_mipmap_tree() call above.
    */
   firstImage = tObj->Image[0][tObj->BaseLevel];

   intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0,
				  &dst_x, &dst_y);

   drm_intel_bo_reference(intelObj->mt->region->bo);
   i830->state.tex_buffer[unit] = intelObj->mt->region->bo;
   pitch = intelObj->mt->region->pitch;

   /* XXX: This calculation is probably broken for tiled images with
    * a non-page-aligned offset.
    */
   i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch;

   format = translate_texture_format(firstImage->TexFormat);

   state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
                               (LOAD_TEXTURE_MAP0 << unit) | 4);

   state[I830_TEXREG_TM0S1] =
      (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) |
       ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format);

   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
      state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE;
      if (intelObj->mt->region->tiling == I915_TILING_Y)
	 state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK;
   }

   state[I830_TEXREG_TM0S2] =
      ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);

   {
      if (tObj->Target == GL_TEXTURE_CUBE_MAP)
         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) |
                                    CUBE_NEGX_ENABLE |
                                    CUBE_POSX_ENABLE |
                                    CUBE_NEGY_ENABLE |
                                    CUBE_POSY_ENABLE |
                                    CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE);
      else
         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit));
   }




   {
      GLuint minFilt, mipFilt, magFilt;
      float maxlod;
      uint32_t minlod_fixed, maxlod_fixed;

      switch (sampler->MinFilter) {
      case GL_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_NEAREST_MIPMAP_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_LINEAR_MIPMAP_NEAREST:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_NEAREST_MIPMAP_LINEAR:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_LINEAR;
         break;
      case GL_LINEAR_MIPMAP_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_LINEAR;
         break;
      default:
         return false;
      }

      if (sampler->MaxAnisotropy > 1.0) {
         minFilt = FILTER_ANISOTROPIC;
         magFilt = FILTER_ANISOTROPIC;
      }
      else {
         switch (sampler->MagFilter) {
         case GL_NEAREST:
            magFilt = FILTER_NEAREST;
            break;
         case GL_LINEAR:
            magFilt = FILTER_LINEAR;
            break;
         default:
            return false;
         }
      }

      lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0);
      if (lodbias < -64)
          lodbias = -64;
      if (lodbias > 63)
          lodbias = 63;
      
      state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & 
                                  TM0S3_LOD_BIAS_MASK);
#if 0
      /* YUV conversion:
       */
      if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
          firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
         state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION;
#endif

      /* We get one field with fraction bits for the maximum
       * addressable (smallest resolution) LOD.  Use it to cover both
       * MAX_LEVEL and MAX_LOD.
       */
      minlod_fixed = U_FIXED(CLAMP(sampler->MinLod, 0.0, 11), 4);
      maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
      if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM ||
	  intel->intelScreen->deviceID == PCI_CHIP_I865_G) {
	 maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2);
	 maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2);
	 state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT;
	 state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP;
      } else {
Пример #19
0
void
intel_bo_reference(struct intel_bo *bo)
{
   drm_intel_bo_reference(gem_bo(bo));
}
Пример #20
0
static void brw_upload_indices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
   GLuint ib_size;
   drm_intel_bo *old_bo = brw->ib.bo;
   struct gl_buffer_object *bufferobj;
   GLuint offset;
   GLuint ib_type_size;

   if (index_buffer == NULL)
      return;

   ib_type_size = _mesa_sizeof_type(index_buffer->type);
   ib_size = ib_type_size * index_buffer->count;
   bufferobj = index_buffer->obj;

   /* Turn into a proper VBO:
    */
   if (!_mesa_is_bufferobj(bufferobj)) {
      /* Get new bufferobj, offset:
       */
      intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
			&brw->ib.bo, &offset);
   } else {
      offset = (GLuint) (unsigned long) index_buffer->ptr;

      /* If the index buffer isn't aligned to its element size, we have to
       * rebase it into a temporary.
       */
      if ((ib_type_size - 1) & offset) {
         perf_debug("copying index buffer to a temporary to work around "
                    "misaligned offset %d\n", offset);

         GLubyte *map = ctx->Driver.MapBufferRange(ctx,
                                                   offset,
                                                   ib_size,
                                                   GL_MAP_READ_BIT,
                                                   bufferobj,
                                                   MAP_INTERNAL);

         intel_upload_data(brw, map, ib_size, ib_type_size,
                           &brw->ib.bo, &offset);

         ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL);
      } else {
         drm_intel_bo *bo =
            intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj),
                                   offset, ib_size);
         if (bo != brw->ib.bo) {
            drm_intel_bo_unreference(brw->ib.bo);
            brw->ib.bo = bo;
            drm_intel_bo_reference(bo);
         }
      }
   }

   /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
    * the index buffer state when we're just moving the start index
    * of our drawing.
    */
   brw->ib.start_vertex_offset = offset / ib_type_size;

   if (brw->ib.bo != old_bo)
      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;

   if (index_buffer->type != brw->ib.type) {
      brw->ib.type = index_buffer->type;
      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
   }
}
Пример #21
0
void
brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* CACHE_NEW_VS_PROG */
   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0;
   unsigned int min_index = brw->vb.min_index + brw->basevertex;
   unsigned int max_index = brw->vb.max_index + brw->basevertex;
   int delta, i, j;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* _NEW_POLYGON
    *
    * On gen6+, edge flags don't end up in the VUE (either in or out of the
    * VS).  Instead, they're uploaded as the last vertex element, and the data
    * is passed sideband through the fixed function units.  So, we need to
    * prepare the vertex buffer for it, but it's not present in inputs_read.
    */
   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
                           ctx->Polygon.BackMode != GL_FILL)) {
      vs_inputs |= VERT_BIT_EDGEFLAG;
   }

   if (0)
      fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint i = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      vs_inputs &= ~BITFIELD64_BIT(i);
      brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      return;

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);
	 int k;

	 /* If we have a VB set to be uploaded for this buffer object
	  * already, reuse that VB state so that we emit fewer
	  * relocations.
	  */
	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
	    buffer->offset = (uintptr_t)glarray->Ptr;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;

            uint32_t offset, size;
            if (glarray->InstanceDivisor) {
               offset = buffer->offset;
               size = (buffer->stride * ((brw->num_instances /
                                          glarray->InstanceDivisor) - 1) +
                       glarray->_ElementSize);
            } else {
               if (min_index == -1) {
                  offset = 0;
                  size = intel_buffer->Base.Size;
               } else {
                  offset = buffer->offset + min_index * buffer->stride;
                  size = (buffer->stride * (max_index - min_index) +
                          glarray->_ElementSize);
               }
            }
            buffer->bo = intel_bufferobj_buffer(brw, intel_buffer,
                                                offset, size);
            drm_intel_bo_reference(buffer->bo);

	    input->buffer = j++;
	    input->offset = 0;
	 }

	 /* This is a common place to reach if the user mistakenly supplies
	  * a pointer in place of a VBO offset.  If we just let it go through,
	  * we may end up dereferencing a pointer beyond the bounds of the
	  * GTT.  We would hope that the VBO's max_index would save us, but
	  * Mesa appears to hand us min/max values not clipped to the
	  * array object's _MaxElement, and _MaxElement frequently appears
	  * to be wrong anyway.
	  *
	  * The VBO spec allows application termination in this case, and it's
	  * probably a service to the poor programmer to do so rather than
	  * trying to just not render.
	  */
	 assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
                  glarray->Ptr < ptr ||
                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
	 {
            /* If our stride is different from the first attribute's stride,
             * or if the first attribute's stride didn't cover our element,
             * disable the interleaved upload optimization.  The second case
             * can most commonly occur in cases where there is a single vertex
             * and, for example, the data is stored on the application's
             * stack.
             *
             * NOTE: This will also disable the optimization in cases where
             * the data is in a different order than the array indices.
             * Something like:
             *
             *     float data[...];
             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
             */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;
      }
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   brw->vb.nr_buffers = j;
}
Пример #22
0
static void brw_upload_indices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
   GLuint ib_size;
   drm_intel_bo *bo = NULL;
   struct gl_buffer_object *bufferobj;
   GLuint offset;
   GLuint ib_type_size;

   if (index_buffer == NULL)
      return;

   ib_type_size = get_size(index_buffer->type);
   ib_size = ib_type_size * index_buffer->count;
   bufferobj = index_buffer->obj;

   /* Turn into a proper VBO:
    */
   if (!_mesa_is_bufferobj(bufferobj)) {

      /* Get new bufferobj, offset:
       */
      intel_upload_data(&brw->intel, index_buffer->ptr, ib_size, ib_type_size,
			&bo, &offset);
      brw->ib.start_vertex_offset = offset / ib_type_size;
   } else {
      offset = (GLuint) (unsigned long) index_buffer->ptr;

      /* If the index buffer isn't aligned to its element size, we have to
       * rebase it into a temporary.
       */
       if ((get_size(index_buffer->type) - 1) & offset) {
           GLubyte *map = ctx->Driver.MapBufferRange(ctx,
						     offset,
						     ib_size,
						     GL_MAP_WRITE_BIT,
						     bufferobj);

	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
			     &bo, &offset);
	   brw->ib.start_vertex_offset = offset / ib_type_size;

           ctx->Driver.UnmapBuffer(ctx, bufferobj);
       } else {
	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
	   * the index buffer state when we're just moving the start index
	   * of our drawing.
	   */
	  brw->ib.start_vertex_offset = offset / ib_type_size;

	  bo = intel_bufferobj_source(intel,
				      intel_buffer_object(bufferobj),
				      ib_type_size,
				      &offset);
	  drm_intel_bo_reference(bo);

	  brw->ib.start_vertex_offset += offset / ib_type_size;
       }
   }

   if (brw->ib.bo != bo) {
      drm_intel_bo_unreference(brw->ib.bo);
      brw->ib.bo = bo;

      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
   } else {
      drm_intel_bo_unreference(bo);
   }

   if (index_buffer->type != brw->ib.type) {
      brw->ib.type = index_buffer->type;
      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
   }
}
Пример #23
0
/* Recalculate all state from scratch.  Perhaps not the most
 * efficient, but this has gotten complex enough that we need
 * something which is understandable and reliable.
 */
static GLboolean
i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
{
   struct gl_context *ctx = &intel->ctx;
   struct i915_context *i915 = i915_context(ctx);
   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *tObj = tUnit->_Current;
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   struct gl_texture_image *firstImage;
   GLuint *state = i915->state.Tex[unit], format, pitch;
   GLint lodbias, aniso = 0;
   GLubyte border[4];
   GLfloat maxlod;

   memset(state, 0, sizeof(state));

   /*We need to refcount these. */

   if (i915->state.tex_buffer[unit] != NULL) {
       drm_intel_bo_unreference(i915->state.tex_buffer[unit]);
       i915->state.tex_buffer[unit] = NULL;
   }

   if (!intel_finalize_mipmap_tree(intel, unit))
      return GL_FALSE;

   /* Get first image here, since intelObj->firstLevel will get set in
    * the intel_finalize_mipmap_tree() call above.
    */
   firstImage = tObj->Image[0][intelObj->firstLevel];

   drm_intel_bo_reference(intelObj->mt->region->buffer);
   i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
   i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */

   format = translate_texture_format(firstImage->TexFormat,
				     firstImage->InternalFormat,
				     tObj->DepthMode);
   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;

   state[I915_TEXREG_MS3] =
      (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
       ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format);

   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
      if (intelObj->mt->region->tiling == I915_TILING_Y)
	 state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
   }

   /* We get one field with fraction bits for the maximum addressable
    * (lowest resolution) LOD.  Use it to cover both MAX_LEVEL and
    * MAX_LOD.
    */
   maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
   state[I915_TEXREG_MS4] =
      ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) |
       MS4_CUBE_FACE_ENA_MASK |
       (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) |
       ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));


   {
      GLuint minFilt, mipFilt, magFilt;

      switch (tObj->MinFilter) {
      case GL_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_NEAREST_MIPMAP_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_LINEAR_MIPMAP_NEAREST:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_NEAREST_MIPMAP_LINEAR:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_LINEAR;
         break;
      case GL_LINEAR_MIPMAP_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_LINEAR;
         break;
      default:
         return GL_FALSE;
      }

      if (tObj->MaxAnisotropy > 1.0) {
         minFilt = FILTER_ANISOTROPIC;
         magFilt = FILTER_ANISOTROPIC;
         if (tObj->MaxAnisotropy > 2.0)
            aniso = SS2_MAX_ANISO_4;
         else
            aniso = SS2_MAX_ANISO_2;
      }
      else {
         switch (tObj->MagFilter) {
         case GL_NEAREST:
            magFilt = FILTER_NEAREST;
            break;
         case GL_LINEAR:
            magFilt = FILTER_LINEAR;
            break;
         default:
            return GL_FALSE;
         }
      }

      lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0);
      if (lodbias < -256)
          lodbias = -256;
      if (lodbias > 255)
          lodbias = 255;
      state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & 
                                SS2_LOD_BIAS_MASK);

      /* YUV conversion:
       */
      if (firstImage->TexFormat == MESA_FORMAT_YCBCR ||
          firstImage->TexFormat == MESA_FORMAT_YCBCR_REV)
         state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION;

      /* Shadow:
       */
      if (tObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
          tObj->Target != GL_TEXTURE_3D) {
         if (tObj->Target == GL_TEXTURE_1D) 
            return GL_FALSE;

         state[I915_TEXREG_SS2] |=
            (SS2_SHADOW_ENABLE |
             intel_translate_shadow_compare_func(tObj->CompareFunc));

         minFilt = FILTER_4X4_FLAT;
         magFilt = FILTER_4X4_FLAT;
      }

      state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
                                 (mipFilt << SS2_MIP_FILTER_SHIFT) |
                                 (magFilt << SS2_MAG_FILTER_SHIFT) |
                                 aniso);
   }

   {
      GLenum ws = tObj->WrapS;
      GLenum wt = tObj->WrapT;
      GLenum wr = tObj->WrapR;
      float minlod;

      /* We program 1D textures as 2D textures, so the 2D texcoord could
       * result in sampling border values if we don't set the T wrap to
       * repeat.
       */
      if (tObj->Target == GL_TEXTURE_1D)
	 wt = GL_REPEAT;

      /* 3D textures don't seem to respect the border color.
       * Fallback if there's ever a danger that they might refer to
       * it.  
       * 
       * Effectively this means fallback on 3D clamp or
       * clamp_to_border.
       */
      if (tObj->Target == GL_TEXTURE_3D &&
          (tObj->MinFilter != GL_NEAREST ||
           tObj->MagFilter != GL_NEAREST) &&
          (ws == GL_CLAMP ||
           wt == GL_CLAMP ||
           wr == GL_CLAMP ||
           ws == GL_CLAMP_TO_BORDER ||
           wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER))
         return GL_FALSE;

      /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not 
       * used) when using cube map texture coordinates
       */
      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
          (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) ||
           ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE))))
          return GL_FALSE;

      state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */

      state[I915_TEXREG_SS3] |=
         ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) |
          (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
          (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));

      minlod = MIN2(tObj->MinLod, tObj->_MaxLevel - tObj->BaseLevel);
      state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
      state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) <<
				 SS3_MIN_LOD_SHIFT);

   }

   /* convert border color from float to ubyte */
   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]);
   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]);
   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]);
   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]);

   if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
      /* GL specs that border color for depth textures is taken from the
       * R channel, while the hardware uses A.  Spam R into all the channels
       * for safety.
       */
      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0],
					       border[0],
					       border[0],
					       border[0]);
   } else {
      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3],
					       border[0],
					       border[1],
					       border[2]);
   }


   I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE);
   /* memcmp was already disabled, but definitely won't work as the
    * region might now change and that wouldn't be detected:
    */
   I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));


#if 0
   DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]);
   DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]);
   DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]);
   DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]);
   DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]);
   DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]);
#endif

   return GL_TRUE;
}
Пример #24
0
/* Recalculate all state from scratch.  Perhaps not the most
 * efficient, but this has gotten complex enough that we need
 * something which is understandable and reliable.
 */
static GLboolean
i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
{
   GLcontext *ctx = &intel->ctx;
   struct i830_context *i830 = i830_context(ctx);
   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *tObj = tUnit->_Current;
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   struct gl_texture_image *firstImage;
   GLuint *state = i830->state.Tex[unit], format, pitch;
   GLint lodbias;
   GLubyte border[4];
   GLuint dst_x, dst_y;

   memset(state, 0, sizeof(state));

   /*We need to refcount these. */

   if (i830->state.tex_buffer[unit] != NULL) {
       drm_intel_bo_unreference(i830->state.tex_buffer[unit]);
       i830->state.tex_buffer[unit] = NULL;
   }

   if (!intel_finalize_mipmap_tree(intel, unit))
      return GL_FALSE;

   /* Get first image here, since intelObj->firstLevel will get set in
    * the intel_finalize_mipmap_tree() call above.
    */
   firstImage = tObj->Image[0][intelObj->firstLevel];

   intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0,
				  &dst_x, &dst_y);

   drm_intel_bo_reference(intelObj->mt->region->buffer);
   i830->state.tex_buffer[unit] = intelObj->mt->region->buffer;
   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;

   /* XXX: This calculation is probably broken for tiled images with
    * a non-page-aligned offset.
    */
   i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch;

   format = translate_texture_format(firstImage->TexFormat,
				     firstImage->InternalFormat);

   state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
                               (LOAD_TEXTURE_MAP0 << unit) | 4);

   state[I830_TEXREG_TM0S1] =
      (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) |
       ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format);

   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
      state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE;
      if (intelObj->mt->region->tiling == I915_TILING_Y)
	 state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK;
   }

   state[I830_TEXREG_TM0S2] =
      ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);

   {
      if (tObj->Target == GL_TEXTURE_CUBE_MAP)
         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) |
                                    CUBE_NEGX_ENABLE |
                                    CUBE_POSX_ENABLE |
                                    CUBE_NEGY_ENABLE |
                                    CUBE_POSY_ENABLE |
                                    CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE);
      else
         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit));
   }




   {
      GLuint minFilt, mipFilt, magFilt;

      switch (tObj->MinFilter) {
      case GL_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_NEAREST_MIPMAP_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_LINEAR_MIPMAP_NEAREST:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_NEAREST_MIPMAP_LINEAR:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_LINEAR;
         break;
      case GL_LINEAR_MIPMAP_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_LINEAR;
         break;
      default:
         return GL_FALSE;
      }

      if (tObj->MaxAnisotropy > 1.0) {
         minFilt = FILTER_ANISOTROPIC;
         magFilt = FILTER_ANISOTROPIC;
      }
      else {
         switch (tObj->MagFilter) {
         case GL_NEAREST:
            magFilt = FILTER_NEAREST;
            break;
         case GL_LINEAR:
            magFilt = FILTER_LINEAR;
            break;
         default:
            return GL_FALSE;
         }
      }

      lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0);
      if (lodbias < -64)
          lodbias = -64;
      if (lodbias > 63)
          lodbias = 63;
      
      state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & 
                                  TM0S3_LOD_BIAS_MASK);
#if 0
      /* YUV conversion:
       */
      if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
          firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
         state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION;
#endif

      state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel -
                                    intelObj->firstLevel) *
                                   4) << TM0S3_MIN_MIP_SHIFT;

      state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
                                   (mipFilt << TM0S3_MIP_FILTER_SHIFT) |
                                   (magFilt << TM0S3_MAG_FILTER_SHIFT));
   }

   {
      GLenum ws = tObj->WrapS;
      GLenum wt = tObj->WrapT;


      /* 3D textures not available on i830
       */
      if (tObj->Target == GL_TEXTURE_3D)
         return GL_FALSE;

      state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
                                MAP_UNIT(unit) |
                                ENABLE_TEXCOORD_PARAMS |
                                ss3 |
                                ENABLE_ADDR_V_CNTL |
                                TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt))
                                | ENABLE_ADDR_U_CNTL |
                                TEXCOORD_ADDR_U_MODE(translate_wrap_mode
                                                     (ws)));
   }

   /* convert border color from float to ubyte */
   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]);
   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]);
   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]);
   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]);

   state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3],
					      border[0],
					      border[1],
					      border[2]);

   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE);
   /* memcmp was already disabled, but definitely won't work as the
    * region might now change and that wouldn't be detected:
    */
   I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
   return GL_TRUE;
}
Пример #25
0
/* Recalculate all state from scratch.  Perhaps not the most
 * efficient, but this has gotten complex enough that we need
 * something which is understandable and reliable.
 */
static bool
i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
{
   struct gl_context *ctx = &intel->ctx;
   struct i915_context *i915 = i915_context(ctx);
   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *tObj = tUnit->_Current;
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   struct gl_texture_image *firstImage;
   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
   GLuint *state = i915->state.Tex[unit], format, pitch;
   GLint lodbias, aniso = 0;
   GLubyte border[4];
   GLfloat maxlod;

   memset(state, 0, sizeof(state));

   /*We need to refcount these. */

   if (i915->state.tex_buffer[unit] != NULL) {
       drm_intel_bo_unreference(i915->state.tex_buffer[unit]);
       i915->state.tex_buffer[unit] = NULL;
   }

   if (!intel_finalize_mipmap_tree(intel, unit))
      return false;

   /* Get first image here, since intelObj->firstLevel will get set in
    * the intel_finalize_mipmap_tree() call above.
    */
   firstImage = tObj->Image[0][tObj->BaseLevel];

   drm_intel_bo_reference(intelObj->mt->region->bo);
   i915->state.tex_buffer[unit] = intelObj->mt->region->bo;
   i915->state.tex_offset[unit] = intelObj->mt->offset;

   format = translate_texture_format(firstImage->TexFormat,
				     tObj->DepthMode);
   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;

   state[I915_TEXREG_MS3] =
      (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
       ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format);

   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
      if (intelObj->mt->region->tiling == I915_TILING_Y)
	 state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
   }

   /* We get one field with fraction bits for the maximum addressable
    * (lowest resolution) LOD.  Use it to cover both MAX_LEVEL and
    * MAX_LOD.
    */
   maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
   state[I915_TEXREG_MS4] =
      ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) |
       MS4_CUBE_FACE_ENA_MASK |
       (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) |
       ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));


   {
      GLuint minFilt, mipFilt, magFilt;

      switch (sampler->MinFilter) {
      case GL_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NONE;
         break;
      case GL_NEAREST_MIPMAP_NEAREST:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_LINEAR_MIPMAP_NEAREST:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_NEAREST;
         break;
      case GL_NEAREST_MIPMAP_LINEAR:
         minFilt = FILTER_NEAREST;
         mipFilt = MIPFILTER_LINEAR;
         break;
      case GL_LINEAR_MIPMAP_LINEAR:
         minFilt = FILTER_LINEAR;
         mipFilt = MIPFILTER_LINEAR;
         break;
      default:
         return false;
      }

      if (sampler->MaxAnisotropy > 1.0) {
         minFilt = FILTER_ANISOTROPIC;
         magFilt = FILTER_ANISOTROPIC;
         if (sampler->MaxAnisotropy > 2.0)
            aniso = SS2_MAX_ANISO_4;
         else
            aniso = SS2_MAX_ANISO_2;
      }
      else {
         switch (sampler->MagFilter) {
         case GL_NEAREST:
            magFilt = FILTER_NEAREST;
            break;
         case GL_LINEAR:
            magFilt = FILTER_LINEAR;
            break;
         default:
            return false;
         }
      }

      lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0);
      if (lodbias < -256)
          lodbias = -256;
      if (lodbias > 255)
          lodbias = 255;
      state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & 
                                SS2_LOD_BIAS_MASK);

      /* YUV conversion:
       */
      if (firstImage->TexFormat == MESA_FORMAT_YCBCR ||
          firstImage->TexFormat == MESA_FORMAT_YCBCR_REV)
         state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION;

      /* Shadow:
       */
      if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
          tObj->Target != GL_TEXTURE_3D) {
         if (tObj->Target == GL_TEXTURE_1D) 
            return false;

         state[I915_TEXREG_SS2] |=
            (SS2_SHADOW_ENABLE |
             intel_translate_shadow_compare_func(sampler->CompareFunc));

         minFilt = FILTER_4X4_FLAT;
         magFilt = FILTER_4X4_FLAT;
      }

      state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
                                 (mipFilt << SS2_MIP_FILTER_SHIFT) |
                                 (magFilt << SS2_MAG_FILTER_SHIFT) |
                                 aniso);
   }

   {
      GLenum ws = sampler->WrapS;
      GLenum wt = sampler->WrapT;
      GLenum wr = sampler->WrapR;
      float minlod;

      /* We program 1D textures as 2D textures, so the 2D texcoord could
       * result in sampling border values if we don't set the T wrap to
       * repeat.
       */
      if (tObj->Target == GL_TEXTURE_1D)
	 wt = GL_REPEAT;

      /* 3D textures don't seem to respect the border color.
       * Fallback if there's ever a danger that they might refer to
       * it.  
       * 
       * Effectively this means fallback on 3D clamp or
       * clamp_to_border.
       */
      if (tObj->Target == GL_TEXTURE_3D &&
          (sampler->MinFilter != GL_NEAREST ||
           sampler->MagFilter != GL_NEAREST) &&
          (ws == GL_CLAMP ||
           wt == GL_CLAMP ||
           wr == GL_CLAMP ||
           ws == GL_CLAMP_TO_BORDER ||
           wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER))
         return false;

      /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not 
       * used) when using cube map texture coordinates
       */
      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
          (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) ||
           ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE))))
          return false;

      /*
       * According to 3DSTATE_MAP_STATE at page of 104 in Bspec
       * Vol3d 3D Instructions:
       *   [DevGDG and DevAlv]: Must be a power of 2 for cube maps.
       *   [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps
       *      must have all faces enabled.
       *
       * But, as I tested on pineview(DevBLB derived), the rendering is
       * bad(you will find the color isn't samplered right in some
       * fragments). After checking, it seems that the texture layout is
       * wrong: making the width and height align of 4(although this
       * doesn't make much sense) will fix this issue and also broke some
       * others. Well, Bspec mentioned nothing about the layout alignment
       * and layout for NPOT cube map.  I guess the Bspec just assume it's
       * a POT cube map.
       *
       * Thus, I guess we need do this for other platforms as well.
       */
      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
          !is_power_of_two(firstImage->Height))
         return false;

      state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */

      state[I915_TEXREG_SS3] |=
         ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) |
          (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
          (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));

      minlod = MIN2(sampler->MinLod, tObj->_MaxLevel - tObj->BaseLevel);
      state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
      state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) <<
				 SS3_MIN_LOD_SHIFT);

   }

   /* convert border color from float to ubyte */
   CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]);
   CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]);
   CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]);
   CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]);

   if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
      /* GL specs that border color for depth textures is taken from the
       * R channel, while the hardware uses A.  Spam R into all the channels
       * for safety.
       */
      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0],
					       border[0],
					       border[0],
					       border[0]);
   } else {
      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3],
					       border[0],
					       border[1],
					       border[2]);
   }


   I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), true);
   /* memcmp was already disabled, but definitely won't work as the
    * region might now change and that wouldn't be detected:
    */
   I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));


#if 0
   DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]);
   DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]);
   DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]);
   DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]);
   DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]);
   DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]);
#endif

   return true;
}
Пример #26
0
static void brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   /* CACHE_NEW_VS_PROG */
   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0, total_size = 0;
   unsigned int min_index = brw->vb.min_index;
   unsigned int max_index = brw->vb.max_index;
   int delta, i, j;
   GLboolean can_merge_uploads = GL_TRUE;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* First build an array of pointers to ve's in vb.inputs_read
    */
   if (0)
      printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint i = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      vs_inputs &= ~BITFIELD64_BIT(i);
      if (input->glarray->Size && get_size(input->glarray->Type))
         brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      goto prepare;

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;
      int type_size = get_size(glarray->Type);

      input->element_size = type_size * glarray->Size;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);
	 int k;

	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
            buffer->bo = intel_bufferobj_source(intel,
                                                intel_buffer, type_size,
						&buffer->offset);
	    drm_intel_bo_reference(buffer->bo);
	    buffer->offset += (uintptr_t)glarray->Ptr;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;

	    input->buffer = j++;
	    input->offset = 0;
	 }

	 /* This is a common place to reach if the user mistakenly supplies
	  * a pointer in place of a VBO offset.  If we just let it go through,
	  * we may end up dereferencing a pointer beyond the bounds of the
	  * GTT.  We would hope that the VBO's max_index would save us, but
	  * Mesa appears to hand us min/max values not clipped to the
	  * array object's _MaxElement, and _MaxElement frequently appears
	  * to be wrong anyway.
	  *
	  * The VBO spec allows application termination in this case, and it's
	  * probably a service to the poor programmer to do so rather than
	  * trying to just not render.
	  */
	 assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    /* Position array not properly enabled:
	     */
	    if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) {
               intel->Fallback = true; /* boolean, not bitfield */
               return;
            }

	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
		  (uintptr_t)(glarray->Ptr - ptr) > interleaved)
	 {
	    interleaved = 0;
	 }
	 else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1))
	 {
	    /* enforce natural alignment (for doubles) */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;

	 total_size = ALIGN(total_size, type_size);
	 total_size += input->element_size;

         if (glarray->InstanceDivisor != 0) {
            can_merge_uploads = GL_FALSE;
         }
      }
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }
   if (delta && !brw->intel.intelScreen->relaxed_relocations)
      min_index = delta = 0;

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved && interleaved <= 2*total_size) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
      else if ((total_size < 2048) && can_merge_uploads) {
	 /* Upload non-interleaved arrays into a single interleaved array */
	 struct brw_vertex_buffer *buffer;
	 int count = MAX2(max_index - min_index + 1, 1);
	 int offset;
	 char *map;

	 map = intel_upload_map(&brw->intel, total_size * count, total_size);
	 for (i = offset = 0; i < nr_uploads; i++) {
	    const unsigned char *src = upload[i]->glarray->Ptr;
	    int size = upload[i]->element_size;
	    int stride = upload[i]->glarray->StrideB;
	    char *dst;
	    int n;

	    offset = ALIGN(offset, get_size(upload[i]->glarray->Type));
	    dst = map + offset;
	    src += min_index * stride;

	    for (n = 0; n < count; n++) {
	       memcpy(dst, src, size);
	       src += stride;
	       dst += total_size;
	    }

	    upload[i]->offset = offset;
	    upload[i]->buffer = j;

	    offset += size;
	 }
	 assert(offset == total_size);
	 buffer = &brw->vb.buffers[j++];
	 intel_upload_unmap(&brw->intel, map, offset * count, offset,
			    &buffer->bo, &buffer->offset);
	 buffer->stride = offset;
	 buffer->step_rate = 0;
	 buffer->offset -= delta * offset;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->element_size);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->element_size);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   /* can we simply extend the current vb? */
   if (j == brw->vb.nr_current_buffers) {
      int delta = 0;
      for (i = 0; i < j; i++) {
	 int d;

	 if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
	     brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride ||
	     brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate)
	    break;

	 d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
	 if (d < 0)
	    break;
	 if (i == 0)
	    delta = d / brw->vb.current_buffers[i].stride;
	 if (delta * brw->vb.current_buffers[i].stride != d)
	    break;
      }

      if (i == j) {
	 brw->vb.start_vertex_bias += delta;
	 while (--j >= 0)
	    drm_intel_bo_unreference(brw->vb.buffers[j].bo);
	 j = 0;
      }
   }

   brw->vb.nr_buffers = j;

prepare:
   brw_prepare_query_begin(brw);
}
Пример #27
0
void
brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_VS_PROG_DATA */
   const struct brw_vs_prog_data *vs_prog_data =
      brw_vs_prog_data(brw->vs.base.prog_data);
   GLbitfield64 vs_inputs = vs_prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0;
   unsigned int min_index = brw->vb.min_index + brw->basevertex;
   unsigned int max_index = brw->vb.max_index + brw->basevertex;
   unsigned i;
   int delta, j;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* _NEW_POLYGON
    *
    * On gen6+, edge flags don't end up in the VUE (either in or out of the
    * VS).  Instead, they're uploaded as the last vertex element, and the data
    * is passed sideband through the fixed function units.  So, we need to
    * prepare the vertex buffer for it, but it's not present in inputs_read.
    */
   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
                           ctx->Polygon.BackMode != GL_FILL)) {
      vs_inputs |= VERT_BIT_EDGEFLAG;
   }

   if (0)
      fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint index = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[index];

      vs_inputs &= ~BITFIELD64_BIT(index);
      brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      return;

   /* The range of data in a given buffer represented as [min, max) */
   struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
   uint32_t buffer_range_start[VERT_ATTRIB_MAX];
   uint32_t buffer_range_end[VERT_ATTRIB_MAX];

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);

         const uint32_t offset = (uintptr_t)glarray->Ptr;

         /* Start with the worst case */
         uint32_t start = 0;
         uint32_t range = intel_buffer->Base.Size;
         if (glarray->InstanceDivisor) {
            if (brw->num_instances) {
               start = offset + glarray->StrideB * brw->baseinstance;
               range = (glarray->StrideB * ((brw->num_instances - 1) /
                                            glarray->InstanceDivisor) +
                        glarray->_ElementSize);
            }
         } else {
            if (brw->vb.index_bounds_valid) {
               start = offset + min_index * glarray->StrideB;
               range = (glarray->StrideB * (max_index - min_index) +
                        glarray->_ElementSize);
            }
         }

	 /* If we have a VB set to be uploaded for this buffer object
	  * already, reuse that VB state so that we emit fewer
	  * relocations.
	  */
	 unsigned k;
	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;

               buffer_range_start[input->buffer] =
                  MIN2(buffer_range_start[input->buffer], start);
               buffer_range_end[input->buffer] =
                  MAX2(buffer_range_end[input->buffer], start + range);
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
	    buffer->offset = offset;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;
            buffer->size = glarray->BufferObj->Size - offset;

            enabled_buffer[j] = intel_buffer;
            buffer_range_start[j] = start;
            buffer_range_end[j] = start + range;

	    input->buffer = j++;
	    input->offset = 0;
	 }
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
                  glarray->Ptr < ptr ||
                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
	 {
            /* If our stride is different from the first attribute's stride,
             * or if the first attribute's stride didn't cover our element,
             * disable the interleaved upload optimization.  The second case
             * can most commonly occur in cases where there is a single vertex
             * and, for example, the data is stored on the application's
             * stack.
             *
             * NOTE: This will also disable the optimization in cases where
             * the data is in a different order than the array indices.
             * Something like:
             *
             *     float data[...];
             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
             */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;
      }
   }

   /* Now that we've set up all of the buffers, we walk through and reference
    * each of them.  We do this late so that we get the right size in each
    * buffer and don't reference too little data.
    */
   for (i = 0; i < j; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
      if (buffer->bo)
         continue;

      const uint32_t start = buffer_range_start[i];
      const uint32_t range = buffer_range_end[i] - buffer_range_start[i];

      buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range);
      drm_intel_bo_reference(buffer->bo);
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;
         buffer->size += delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->size += delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   brw->vb.nr_buffers = j;
}