void
GalliumContext::Invalidate(uint32 width, uint32 height)
{
	CALLED();

	assert(fContext[fCurrentContext]);

	// Update st_context dimensions 
	fContext[fCurrentContext]->width = width;
	fContext[fCurrentContext]->height = height;

	// Is this the best way to invalidate?
	p_atomic_inc(&fContext[fCurrentContext]->read->stfbi->stamp);
	p_atomic_inc(&fContext[fCurrentContext]->draw->stfbi->stamp);
}
Beispiel #2
0
void
_mesa_reference_shader_program_data(struct gl_context *ctx,
                                    struct gl_shader_program_data **ptr,
                                    struct gl_shader_program_data *data)
{
   if (*ptr == data)
      return;

   if (*ptr) {
      struct gl_shader_program_data *oldData = *ptr;

      assert(oldData->RefCount > 0);

      if (p_atomic_dec_zero(&oldData->RefCount)) {
         assert(ctx);
         ralloc_free(oldData);
      }

      *ptr = NULL;
   }

   if (data)
      p_atomic_inc(&data->RefCount);

   *ptr = data;
}
Beispiel #3
0
static struct radeon_winsys_cs *
amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
                 enum ring_type ring_type,
                 void (*flush)(void *ctx, unsigned flags,
                               struct pipe_fence_handle **fence),
                 void *flush_ctx,
                 struct radeon_winsys_cs_handle *trace_buf)
{
   struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
   struct amdgpu_cs *cs;

   cs = CALLOC_STRUCT(amdgpu_cs);
   if (!cs) {
      return NULL;
   }

   cs->ctx = ctx;
   cs->flush_cs = flush;
   cs->flush_data = flush_ctx;
   cs->base.ring_type = ring_type;

   if (!amdgpu_init_cs_context(cs, ring_type)) {
      FREE(cs);
      return NULL;
   }

   if (!amdgpu_get_new_ib(cs)) {
      amdgpu_destroy_cs_context(cs);
      FREE(cs);
      return NULL;
   }

   p_atomic_inc(&ctx->ws->num_cs);
   return &cs->base;
}
Beispiel #4
0
static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
{
    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
    struct radeon_drm_cs *cs;

    cs = CALLOC_STRUCT(radeon_drm_cs);
    if (!cs) {
        return NULL;
    }
    pipe_semaphore_init(&cs->flush_queued, 0);
    pipe_semaphore_init(&cs->flush_completed, 0);

    cs->ws = ws;

    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
        FREE(cs);
        return NULL;
    }
    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
        radeon_destroy_cs_context(&cs->csc1);
        FREE(cs);
        return NULL;
    }

    /* Set the first command buffer as current. */
    cs->csc = &cs->csc1;
    cs->cst = &cs->csc2;
    cs->base.buf = cs->csc->buf;

    p_atomic_inc(&ws->num_cs);
    if (cs->ws->num_cpus > 1 && debug_get_option_thread())
        cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs);
    return &cs->base;
}
Beispiel #5
0
static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
        enum ring_type ring_type,
        struct radeon_winsys_cs_handle *trace_buf)
{
    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
    struct radeon_drm_cs *cs;

    cs = CALLOC_STRUCT(radeon_drm_cs);
    if (!cs) {
        return NULL;
    }
    pipe_semaphore_init(&cs->flush_completed, 0);

    cs->ws = ws;
    cs->trace_buf = (struct radeon_bo*)trace_buf;

    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
        FREE(cs);
        return NULL;
    }
    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
        radeon_destroy_cs_context(&cs->csc1);
        FREE(cs);
        return NULL;
    }

    /* Set the first command buffer as current. */
    cs->csc = &cs->csc1;
    cs->cst = &cs->csc2;
    cs->base.buf = cs->csc->buf;
    cs->base.ring_type = ring_type;

    p_atomic_inc(&ws->num_cs);
    return &cs->base;
}
Beispiel #6
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(cs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        cs->cst->flags[0] = 0;
        cs->cst->flags[1] = RADEON_CS_RING_GFX;
        cs->cst->cs.num_chunks = 2;
        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
            cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->cs.num_chunks = 3;
        }
        if (cs->ws->info.r600_virtual_address) {
            cs->cst->flags[0] |= RADEON_CS_USE_VM;
            cs->cst->cs.num_chunks = 3;
        }
        if (flags & RADEON_FLUSH_COMPUTE) {
            cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
            cs->cst->cs.num_chunks = 3;
        }

        if (cs->thread &&
            (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            pipe_semaphore_signal(&cs->flush_queued);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
static inline void
drisw_invalidate_drawable(__DRIdrawable *dPriv)
{
   struct dri_drawable *drawable = dri_drawable(dPriv);

   drawable->texture_stamp = dPriv->lastStamp - 1;

   p_atomic_inc(&drawable->base.stamp);
}
Beispiel #8
0
static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
                                 struct amdgpu_winsys_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 unsigned priority,
                                 enum radeon_bo_domain *added_domains)
{
   struct amdgpu_cs_buffer *buffer;
   unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
   int i = -1;

   assert(priority < 64);
   *added_domains = 0;

   i = amdgpu_lookup_buffer(cs, bo);

   if (i >= 0) {
      buffer = &cs->buffers[i];
      buffer->priority_usage |= 1llu << priority;
      buffer->usage |= usage;
      *added_domains = domains & ~buffer->domains;
      buffer->domains |= domains;
      cs->flags[i] = MAX2(cs->flags[i], priority / 4);
      return i;
   }

   /* New buffer, check if the backing array is large enough. */
   if (cs->num_buffers >= cs->max_num_buffers) {
      uint32_t size;
      cs->max_num_buffers += 10;

      size = cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer);
      cs->buffers = realloc(cs->buffers, size);

      size = cs->max_num_buffers * sizeof(amdgpu_bo_handle);
      cs->handles = realloc(cs->handles, size);

      cs->flags = realloc(cs->flags, cs->max_num_buffers);
   }

   /* Initialize the new buffer. */
   cs->buffers[cs->num_buffers].bo = NULL;
   amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
   cs->handles[cs->num_buffers] = bo->bo;
   cs->flags[cs->num_buffers] = priority / 4;
   p_atomic_inc(&bo->num_cs_references);
   buffer = &cs->buffers[cs->num_buffers];
   buffer->bo = bo;
   buffer->priority_usage = 1llu << priority;
   buffer->usage = usage;
   buffer->domains = domains;

   cs->buffer_indices_hashlist[hash] = cs->num_buffers;

   *added_domains = domains;
   return cs->num_buffers++;
}
Beispiel #9
0
static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs,
                                                 struct radeon_bo *bo)
{
    struct radeon_cs_context *csc = cs->csc;
    struct drm_radeon_cs_reloc *reloc;
    unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
    int i = -1;

    i = radeon_lookup_buffer(csc, bo);

    if (i >= 0) {
        /* For async DMA, every add_buffer call must add a buffer to the list
         * no matter how many duplicates there are. This is due to the fact
         * the DMA CS checker doesn't use NOP packets for offset patching,
         * but always uses the i-th buffer from the list to patch the i-th
         * offset. If there are N offsets in a DMA CS, there must also be N
         * buffers in the relocation list.
         *
         * This doesn't have to be done if virtual memory is enabled,
         * because there is no offset patching with virtual memory.
         */
        if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) {
            return i;
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->num_relocs >= csc->max_relocs) {
        uint32_t size;
        csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3));

        size = csc->max_relocs * sizeof(csc->relocs_bo[0]);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->num_relocs].bo = NULL;
    csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0;
    radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->num_relocs];
    reloc->handle = bo->handle;
    reloc->read_domains = 0;
    reloc->write_domain = 0;
    reloc->flags = 0;

    csc->reloc_indices_hashlist[hash] = csc->num_relocs;

    csc->chunks[1].length_dw += RELOC_DWORDS;

    return csc->num_relocs++;
}
Beispiel #10
0
static void
dri2_invalidate_drawable(__DRIdrawable *dPriv)
{
   struct dri_drawable *drawable = dri_drawable(dPriv);

   dri2InvalidateDrawable(dPriv);
   drawable->dPriv->lastStamp = drawable->dPriv->dri2.stamp;

   p_atomic_inc(&drawable->base.stamp);
}
Beispiel #11
0
static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
                                 struct amdgpu_winsys_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 unsigned priority,
                                 enum radeon_bo_domain *added_domains)
{
   struct amdgpu_cs_buffer *reloc;
   unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
   int i = -1;

   priority = MIN2(priority, 15);
   *added_domains = 0;

   i = amdgpu_get_reloc(cs, bo);

   if (i >= 0) {
      reloc = &cs->buffers[i];
      reloc->usage |= usage;
      *added_domains = domains & ~reloc->domains;
      reloc->domains |= domains;
      cs->flags[i] = MAX2(cs->flags[i], priority);
      return i;
   }

   /* New relocation, check if the backing array is large enough. */
   if (cs->num_buffers >= cs->max_num_buffers) {
      uint32_t size;
      cs->max_num_buffers += 10;

      size = cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer);
      cs->buffers = realloc(cs->buffers, size);

      size = cs->max_num_buffers * sizeof(amdgpu_bo_handle);
      cs->handles = realloc(cs->handles, size);

      cs->flags = realloc(cs->flags, cs->max_num_buffers);
   }

   /* Initialize the new relocation. */
   cs->buffers[cs->num_buffers].bo = NULL;
   amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
   cs->handles[cs->num_buffers] = bo->bo;
   cs->flags[cs->num_buffers] = priority;
   p_atomic_inc(&bo->num_cs_references);
   reloc = &cs->buffers[cs->num_buffers];
   reloc->bo = bo;
   reloc->usage = usage;
   reloc->domains = domains;

   cs->buffer_indices_hashlist[hash] = cs->num_buffers;

   *added_domains = domains;
   return cs->num_buffers++;
}
static void
egl_g3d_invalid_surface(struct native_display *ndpy,
                        struct native_surface *nsurf,
                        unsigned int seq_num)
{
   /* XXX not thread safe? */
   struct egl_g3d_surface *gsurf = egl_g3d_surface(nsurf->user_data);

   if (gsurf && gsurf->stfbi)
      p_atomic_inc(&gsurf->stfbi->stamp);
}
static int
thread_function(void *thread_data)
{
   int thread_id = *((int *) thread_data);

   LOG("thread %d starting\n", thread_id);
   os_time_sleep(thread_id * 100 * 1000);
   LOG("thread %d before barrier\n", thread_id);

   CHECK(p_atomic_read(&proceeded) == 0);
   p_atomic_inc(&waiting);

   pipe_barrier_wait(&barrier);

   CHECK(p_atomic_read(&waiting) == NUM_THREADS);

   p_atomic_inc(&proceeded);

   LOG("thread %d exiting\n", thread_id);

   return 0;
}
Beispiel #14
0
void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs)
{
retry:
    pipe_mutex_lock(ws->cs_stack_lock);
    if (p_atomic_read(&ws->ncs) >= RING_LAST) {
        /* no room left for a flush */
        pipe_mutex_unlock(ws->cs_stack_lock);
        goto retry;
    }
    ws->cs_stack[p_atomic_read(&ws->ncs)] = cs;
    p_atomic_inc(&ws->ncs);
    pipe_mutex_unlock(ws->cs_stack_lock);
    pipe_semaphore_signal(&ws->cs_queued);
}
Beispiel #15
0
static void
vmw_swc_shader_relocation(struct svga_winsys_context *swc,
			  uint32 *shid,
			  uint32 *mobid,
			  uint32 *offset,
			  struct svga_winsys_gb_shader *shader,
                          unsigned flags)
{
   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
   struct vmw_winsys_screen *vws = vswc->vws;
   struct vmw_svga_winsys_shader *vshader;
   struct vmw_ctx_validate_item *ishader;

   if(!shader) {
      *shid = SVGA3D_INVALID_ID;
      return;
   }

   vshader = vmw_svga_winsys_shader(shader);

   if (!vws->base.have_vgpu10) {
      assert(vswc->shader.staged < vswc->shader.reserved);
      ishader = util_hash_table_get(vswc->hash, vshader);

      if (ishader == NULL) {
         ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
         vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
         ishader->referenced = FALSE;
         /*
          * Note that a failure here may just fall back to unhashed behavior
          * and potentially cause unnecessary flushing, so ignore the
          * return code.
          */
         (void) util_hash_table_set(vswc->hash, vshader, ishader);
         ++vswc->shader.staged;
      }

      if (!ishader->referenced) {
         ishader->referenced = TRUE;
         p_atomic_inc(&vshader->validated);
      }
   }

   if (shid)
      *shid = vshader->shid;

   if (vshader->buf)
      vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf,
			     0, SVGA_RELOC_READ);
}
Beispiel #16
0
static struct pipe_fence_handle *
amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
                    unsigned ip_instance, unsigned ring)
{
   struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);

   fence->reference.count = 1;
   fence->ctx = ctx;
   fence->fence.context = ctx->ctx;
   fence->fence.ip_type = ip_type;
   fence->fence.ip_instance = ip_instance;
   fence->fence.ring = ring;
   p_atomic_inc(&ctx->refcount);
   return (struct pipe_fence_handle *)fence;
}
/**
 * Add the buffer to the fenced list.
 *
 * Reference count should be incremented before calling this function.
 */
static INLINE void
fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
                         struct fenced_buffer *fenced_buf)
{
   assert(pipe_is_referenced(&fenced_buf->base.base.reference));
   assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
   assert(fenced_buf->fence);

   p_atomic_inc(&fenced_buf->base.base.reference.count);

   LIST_DEL(&fenced_buf->head);
   assert(fenced_mgr->num_unfenced);
   --fenced_mgr->num_unfenced;
   LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced);
   ++fenced_mgr->num_fenced;
}
Beispiel #18
0
static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs,
                                            struct radeon_bo *bo)
{
    struct radeon_cs_context *csc = cs->csc;
    unsigned hash;
    struct radeon_bo_item *item;
    int idx;
    int real_idx;

    idx = radeon_lookup_buffer(csc, bo);
    if (idx >= 0)
        return idx;

    real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real);

    /* Check if the backing array is large enough. */
    if (csc->num_slab_buffers >= csc->max_slab_buffers) {
        unsigned new_max = MAX2(csc->max_slab_buffers + 16,
                                (unsigned)(csc->max_slab_buffers * 1.3));
        struct radeon_bo_item *new_buffers =
            REALLOC(csc->slab_buffers,
                    csc->max_slab_buffers * sizeof(*new_buffers),
                    new_max * sizeof(*new_buffers));
        if (!new_buffers) {
            fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n");
            return -1;
        }

        csc->max_slab_buffers = new_max;
        csc->slab_buffers = new_buffers;
    }

    /* Initialize the new relocation. */
    idx = csc->num_slab_buffers++;
    item = &csc->slab_buffers[idx];

    item->bo = NULL;
    item->u.slab.real_idx = real_idx;
    radeon_bo_reference(&item->bo, bo);
    p_atomic_inc(&bo->num_cs_references);

    hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
    csc->reloc_indices_hashlist[hash] = idx;

    return idx;
}
Beispiel #19
0
boolean
util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size,
                     struct pipe_context *ctx, struct pipe_resource *pt,
                     unsigned level, unsigned layer,
                     struct pipe_surface **res)
{
   struct pipe_surface *ps;

   if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
   {    /* or 2D array */
      if(!us->u.hash)
         us->u.hash = cso_hash_create();

      ps = cso_hash_iter_data(cso_hash_find(us->u.hash, (layer << 8) | level));
   }
   else
   {
      if(!us->u.array)
         us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
      ps = us->u.array[level];
   }

   if(ps && ps->context == ctx)
   {
      p_atomic_inc(&ps->reference.count);
      *res = ps;
      return FALSE;
   }

   ps = (struct pipe_surface *)CALLOC(1, surface_struct_size);
   if(!ps)
   {
      *res = NULL;
      return FALSE;
   }

   pipe_surface_init(ctx, ps, pt, level, layer);

   if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
      cso_hash_insert(us->u.hash, (layer << 8) | level, ps);
   else
      us->u.array[level] = ps;

   *res = ps;
   return TRUE;
}
Beispiel #20
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    radeon_drm_cs_sync_flush(cs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty, emit it in a separate thread. */
    if (cs->base.cdw) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
            cs->cst->cs.num_chunks = 3;
            cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS;
        } else {
            cs->cst->cs.num_chunks = 2;
        }

        if (cs->thread &&
            (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            pipe_semaphore_signal(&cs->flush_queued);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Beispiel #21
0
static void virgl_vtest_add_res(struct virgl_vtest_winsys *vtws,
                                struct virgl_vtest_cmd_buf *cbuf,
                                struct virgl_hw_res *res)
{
   unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1);

   if (cbuf->cres > cbuf->nres) {
      fprintf(stderr,"failure to add relocation\n");
      return;
   }

   cbuf->res_bo[cbuf->cres] = NULL;
   virgl_vtest_resource_reference(vtws, &cbuf->res_bo[cbuf->cres], res);
   cbuf->is_handle_added[hash] = TRUE;

   cbuf->reloc_indices_hashlist[hash] = cbuf->cres;
   p_atomic_inc(&res->num_cs_references);
   cbuf->cres++;
}
Beispiel #22
0
static struct radeon_winsys_cs *
radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
                     enum ring_type ring_type,
                     void (*flush)(void *ctx, unsigned flags,
                                   struct pipe_fence_handle **fence),
                     void *flush_ctx,
                     struct radeon_winsys_cs_handle *trace_buf)
{
    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
    struct radeon_drm_cs *cs;

    cs = CALLOC_STRUCT(radeon_drm_cs);
    if (!cs) {
        return NULL;
    }
    pipe_semaphore_init(&cs->flush_completed, 1);

    cs->ws = ws;
    cs->flush_cs = flush;
    cs->flush_data = flush_ctx;
    cs->trace_buf = (struct radeon_bo*)trace_buf;

    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
        FREE(cs);
        return NULL;
    }
    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
        radeon_destroy_cs_context(&cs->csc1);
        FREE(cs);
        return NULL;
    }

    /* Set the first command buffer as current. */
    cs->csc = &cs->csc1;
    cs->cst = &cs->csc2;
    cs->base.buf = cs->csc->buf;
    cs->base.ring_type = ring_type;
    cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);

    p_atomic_inc(&ws->num_cs);
    return &cs->base;
}
Beispiel #23
0
static void si_alloc_separate_cmask(struct si_screen *sscreen,
				    struct si_texture *tex)
{
	if (tex->cmask_buffer || !tex->surface.cmask_size)
                return;

	tex->cmask_buffer =
		si_aligned_buffer_create(&sscreen->b,
					 SI_RESOURCE_FLAG_UNMAPPABLE,
					 PIPE_USAGE_DEFAULT,
					 tex->surface.cmask_size,
					 tex->surface.cmask_alignment);
	if (tex->cmask_buffer == NULL)
		return;

	tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8;
	tex->cb_color_info |= S_028C70_FAST_CLEAR(1);

	p_atomic_inc(&sscreen->compressed_colortex_counter);
}
Beispiel #24
0
static struct radeon_cmdbuf *
radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
                     enum ring_type ring_type,
                     void (*flush)(void *ctx, unsigned flags,
                                   struct pipe_fence_handle **fence),
                     void *flush_ctx,
                     bool stop_exec_on_failure)
{
    struct radeon_drm_winsys *ws = ((struct radeon_ctx*)ctx)->ws;
    struct radeon_drm_cs *cs;

    cs = CALLOC_STRUCT(radeon_drm_cs);
    if (!cs) {
        return NULL;
    }
    util_queue_fence_init(&cs->flush_completed);

    cs->ws = ws;
    cs->flush_cs = flush;
    cs->flush_data = flush_ctx;

    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
        FREE(cs);
        return NULL;
    }
    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
        radeon_destroy_cs_context(&cs->csc1);
        FREE(cs);
        return NULL;
    }

    /* Set the first command buffer as current. */
    cs->csc = &cs->csc1;
    cs->cst = &cs->csc2;
    cs->base.current.buf = cs->csc->buf;
    cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
    cs->ring_type = ring_type;

    p_atomic_inc(&ws->num_cs);
    return &cs->base;
}
Beispiel #25
0
static void
vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
				uint32 *where,
				struct vmw_svga_winsys_surface *vsurf,
				unsigned flags)
{
   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
   struct vmw_ctx_validate_item *isrf;

   assert(vswc->surface.staged < vswc->surface.reserved);
   isrf = util_hash_table_get(vswc->hash, vsurf);

   if (isrf == NULL) {
      isrf = &vswc->surface.items[vswc->surface.used + vswc->surface.staged];
      vmw_svga_winsys_surface_reference(&isrf->vsurf, vsurf);
      isrf->referenced = FALSE;
      /*
       * Note that a failure here may just fall back to unhashed behavior
       * and potentially cause unnecessary flushing, so ignore the
       * return code.
       */
      (void) util_hash_table_set(vswc->hash, vsurf, isrf);
      ++vswc->surface.staged;

      vswc->seen_surfaces += vsurf->size;
      if ((swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) &&
          vswc->seen_surfaces >=
            vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR)
         vswc->preemptive_flush = TRUE;
   }

   if (!(flags & SVGA_RELOC_INTERNAL) && !isrf->referenced) {
      isrf->referenced = TRUE;
      p_atomic_inc(&vsurf->validated);
   }

   if (where)
      *where = vsurf->sid;
}
struct pipe_surface *
util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
   struct pipe_surface *ps;

   if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
   {    /* or 2D array */
      if(!us->u.hash)
         us->u.hash = cso_hash_create();

      ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level));
   }
   else
   {
      if(!us->u.array)
         us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *));
      ps = us->u.array[level];
   }

   if(ps)
   {
      p_atomic_inc(&ps->reference.count);
      return ps;
   }

   ps = (struct pipe_surface *)CALLOC(1, surface_struct_size);
   if(!ps)
      return NULL;

   pipe_surface_init(ps, pt, face, level, zslice, flags);
   ps->offset = ~0;

   if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)
      cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps);
   else
      us->u.array[level] = ps;

   return ps;
}
Beispiel #27
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (flags & RADEON_FLUSH_COMPUTE) {
            if (cs->ws->info.chip_class <= SI) {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
            } else {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
            }
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        }
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
        fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            radeon_drm_ws_queue_cs(cs->ws, cs);
        } else {
            pipe_mutex_lock(cs->ws->cs_stack_lock);
            if (cs->ws->thread) {
                while (p_atomic_read(&cs->ws->ncs)) {
                    pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock);
                }
            }
            pipe_mutex_unlock(cs->ws->cs_stack_lock);
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Beispiel #28
0
static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
                                 struct radeon_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 enum radeon_bo_domain *added_domains)
{
    struct radeon_cs_context *csc = cs->csc;
    struct drm_radeon_cs_reloc *reloc;
    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
    bool update_hash = TRUE;
    int i;

    *added_domains = 0;
    if (csc->is_handle_added[hash]) {
        i = csc->reloc_indices_hashlist[hash];
        reloc = &csc->relocs[i];
        if (reloc->handle != bo->handle) {
            /* Hash collision, look for the BO in the list of relocs linearly. */
            for (i = csc->crelocs - 1; i >= 0; i--) {
                reloc = &csc->relocs[i];
                if (reloc->handle == bo->handle) {
                    /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
                    break;
                }
            }
        }

        if (i >= 0) {
            /* On DMA ring we need to emit as many relocation as there is use of the bo
             * thus each time this function is call we should grow add again the bo to
             * the relocation buffer
             *
             * Do not update the hash table if it's dma ring, so that first hash always point
             * to first bo relocation which will the one used by the kernel. Following relocation
             * will be ignore by the kernel memory placement (but still use by the kernel to
             * update the cmd stream with proper buffer offset).
             */
            update_hash = FALSE;
            update_reloc_domains(reloc, rd, wd, added_domains);
            if (cs->base.ring_type != RING_DMA) {
                csc->reloc_indices_hashlist[hash] = i;
                return i;
            }
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->crelocs >= csc->nrelocs) {
        uint32_t size;
        csc->nrelocs += 10;

        size = csc->nrelocs * sizeof(struct radeon_bo*);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->crelocs] = NULL;
    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->crelocs];
    reloc->handle = bo->handle;
    reloc->read_domains = rd;
    reloc->write_domain = wd;
    reloc->flags = 0;

    csc->is_handle_added[hash] = TRUE;
    if (update_hash) {
        csc->reloc_indices_hashlist[hash] = csc->crelocs;
    }

    csc->chunks[1].length_dw += RELOC_DWORDS;

    *added_domains = rd | wd;
    return csc->crelocs++;
}
Beispiel #29
0
static void *si_create_compute_state(
	struct pipe_context *ctx,
	const struct pipe_compute_state *cso)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct si_screen *sscreen = (struct si_screen *)ctx->screen;
	struct si_compute *program = CALLOC_STRUCT(si_compute);
	struct si_shader *shader = &program->shader;


	program->ir_type = cso->ir_type;
	program->local_size = cso->req_local_mem;
	program->private_size = cso->req_private_mem;
	program->input_size = cso->req_input_mem;
	program->use_code_object_v2 = HAVE_LLVM >= 0x0400 &&
					cso->ir_type == PIPE_SHADER_IR_NATIVE;


	if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
		struct si_shader_selector sel;
		bool scratch_enabled;

		memset(&sel, 0, sizeof(sel));

		sel.tokens = tgsi_dup_tokens(cso->prog);
		if (!sel.tokens) {
			FREE(program);
			return NULL;
		}

		tgsi_scan_shader(cso->prog, &sel.info);
		sel.type = PIPE_SHADER_COMPUTE;
		sel.local_size = cso->req_local_mem;

		p_atomic_inc(&sscreen->b.num_shaders_created);

		program->shader.selector = &sel;

		if (si_shader_create(sscreen, sctx->tm, &program->shader,
		                     &sctx->b.debug)) {
			FREE(sel.tokens);
			FREE(program);
			return NULL;
		}

		scratch_enabled = shader->config.scratch_bytes_per_wave > 0;

		shader->config.rsrc1 =
			   S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
			   S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
			   S_00B848_DX10_CLAMP(1) |
			   S_00B848_FLOAT_MODE(shader->config.float_mode);

		shader->config.rsrc2 = S_00B84C_USER_SGPR(SI_CS_NUM_USER_SGPR) |
			   S_00B84C_SCRATCH_EN(scratch_enabled) |
			   S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
			   S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
			   S_00B84C_LDS_SIZE(shader->config.lds_size);

		FREE(sel.tokens);
	} else {
		const struct pipe_llvm_program_header *header;
		const char *code;
		header = cso->prog;
		code = cso->prog + sizeof(struct pipe_llvm_program_header);

		radeon_elf_read(code, header->num_bytes, &program->shader.binary);
		if (program->use_code_object_v2) {
			const amd_kernel_code_t *code_object =
				si_compute_get_code_object(program, 0);
			code_object_to_config(code_object, &program->shader.config);
		} else {
			si_shader_binary_read_config(&program->shader.binary,
				     &program->shader.config, 0);
		}
		si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
			       PIPE_SHADER_COMPUTE, stderr);
		si_shader_binary_upload(sctx->screen, &program->shader);
	}

	return program;
}
Beispiel #30
0
/**
 * Notify the binding context to validate the buffer.
 */
void
xmesa_notify_invalid_buffer(XMesaBuffer b)
{
   p_atomic_inc(&b->stfb->stamp);
}