void GalliumContext::Invalidate(uint32 width, uint32 height) { CALLED(); assert(fContext[fCurrentContext]); // Update st_context dimensions fContext[fCurrentContext]->width = width; fContext[fCurrentContext]->height = height; // Is this the best way to invalidate? p_atomic_inc(&fContext[fCurrentContext]->read->stfbi->stamp); p_atomic_inc(&fContext[fCurrentContext]->draw->stfbi->stamp); }
void _mesa_reference_shader_program_data(struct gl_context *ctx, struct gl_shader_program_data **ptr, struct gl_shader_program_data *data) { if (*ptr == data) return; if (*ptr) { struct gl_shader_program_data *oldData = *ptr; assert(oldData->RefCount > 0); if (p_atomic_dec_zero(&oldData->RefCount)) { assert(ctx); ralloc_free(oldData); } *ptr = NULL; } if (data) p_atomic_inc(&data->RefCount); *ptr = data; }
static struct radeon_winsys_cs * amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), void *flush_ctx, struct radeon_winsys_cs_handle *trace_buf) { struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; struct amdgpu_cs *cs; cs = CALLOC_STRUCT(amdgpu_cs); if (!cs) { return NULL; } cs->ctx = ctx; cs->flush_cs = flush; cs->flush_data = flush_ctx; cs->base.ring_type = ring_type; if (!amdgpu_init_cs_context(cs, ring_type)) { FREE(cs); return NULL; } if (!amdgpu_get_new_ib(cs)) { amdgpu_destroy_cs_context(cs); FREE(cs); return NULL; } p_atomic_inc(&ctx->ws->num_cs); return &cs->base; }
static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct radeon_drm_cs *cs; cs = CALLOC_STRUCT(radeon_drm_cs); if (!cs) { return NULL; } pipe_semaphore_init(&cs->flush_queued, 0); pipe_semaphore_init(&cs->flush_completed, 0); cs->ws = ws; if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); return NULL; } if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } /* Set the first command buffer as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; cs->base.buf = cs->csc->buf; p_atomic_inc(&ws->num_cs); if (cs->ws->num_cpus > 1 && debug_get_option_thread()) cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs); return &cs->base; }
static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type, struct radeon_winsys_cs_handle *trace_buf) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct radeon_drm_cs *cs; cs = CALLOC_STRUCT(radeon_drm_cs); if (!cs) { return NULL; } pipe_semaphore_init(&cs->flush_completed, 0); cs->ws = ws; cs->trace_buf = (struct radeon_bo*)trace_buf; if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); return NULL; } if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } /* Set the first command buffer as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; cs->base.buf = cs->csc->buf; cs->base.ring_type = ring_type; p_atomic_inc(&ws->num_cs); return &cs->base; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } radeon_drm_cs_sync_flush(cs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } if (cs->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; pipe_semaphore_signal(&cs->flush_queued); } else { radeon_drm_cs_emit_ioctl_oneshot(cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static inline void drisw_invalidate_drawable(__DRIdrawable *dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); drawable->texture_stamp = dPriv->lastStamp - 1; p_atomic_inc(&drawable->base.stamp); }
static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, unsigned priority, enum radeon_bo_domain *added_domains) { struct amdgpu_cs_buffer *buffer; unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = -1; assert(priority < 64); *added_domains = 0; i = amdgpu_lookup_buffer(cs, bo); if (i >= 0) { buffer = &cs->buffers[i]; buffer->priority_usage |= 1llu << priority; buffer->usage |= usage; *added_domains = domains & ~buffer->domains; buffer->domains |= domains; cs->flags[i] = MAX2(cs->flags[i], priority / 4); return i; } /* New buffer, check if the backing array is large enough. */ if (cs->num_buffers >= cs->max_num_buffers) { uint32_t size; cs->max_num_buffers += 10; size = cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer); cs->buffers = realloc(cs->buffers, size); size = cs->max_num_buffers * sizeof(amdgpu_bo_handle); cs->handles = realloc(cs->handles, size); cs->flags = realloc(cs->flags, cs->max_num_buffers); } /* Initialize the new buffer. */ cs->buffers[cs->num_buffers].bo = NULL; amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo); cs->handles[cs->num_buffers] = bo->bo; cs->flags[cs->num_buffers] = priority / 4; p_atomic_inc(&bo->num_cs_references); buffer = &cs->buffers[cs->num_buffers]; buffer->bo = bo; buffer->priority_usage = 1llu << priority; buffer->usage = usage; buffer->domains = domains; cs->buffer_indices_hashlist[hash] = cs->num_buffers; *added_domains = domains; return cs->num_buffers++; }
static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs, struct radeon_bo *bo) { struct radeon_cs_context *csc = cs->csc; struct drm_radeon_cs_reloc *reloc; unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); int i = -1; i = radeon_lookup_buffer(csc, bo); if (i >= 0) { /* For async DMA, every add_buffer call must add a buffer to the list * no matter how many duplicates there are. This is due to the fact * the DMA CS checker doesn't use NOP packets for offset patching, * but always uses the i-th buffer from the list to patch the i-th * offset. If there are N offsets in a DMA CS, there must also be N * buffers in the relocation list. * * This doesn't have to be done if virtual memory is enabled, * because there is no offset patching with virtual memory. */ if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) { return i; } } /* New relocation, check if the backing array is large enough. */ if (csc->num_relocs >= csc->max_relocs) { uint32_t size; csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3)); size = csc->max_relocs * sizeof(csc->relocs_bo[0]); csc->relocs_bo = realloc(csc->relocs_bo, size); size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc); csc->relocs = realloc(csc->relocs, size); csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; } /* Initialize the new relocation. */ csc->relocs_bo[csc->num_relocs].bo = NULL; csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0; radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo); p_atomic_inc(&bo->num_cs_references); reloc = &csc->relocs[csc->num_relocs]; reloc->handle = bo->handle; reloc->read_domains = 0; reloc->write_domain = 0; reloc->flags = 0; csc->reloc_indices_hashlist[hash] = csc->num_relocs; csc->chunks[1].length_dw += RELOC_DWORDS; return csc->num_relocs++; }
static void dri2_invalidate_drawable(__DRIdrawable *dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); dri2InvalidateDrawable(dPriv); drawable->dPriv->lastStamp = drawable->dPriv->dri2.stamp; p_atomic_inc(&drawable->base.stamp); }
static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, unsigned priority, enum radeon_bo_domain *added_domains) { struct amdgpu_cs_buffer *reloc; unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = -1; priority = MIN2(priority, 15); *added_domains = 0; i = amdgpu_get_reloc(cs, bo); if (i >= 0) { reloc = &cs->buffers[i]; reloc->usage |= usage; *added_domains = domains & ~reloc->domains; reloc->domains |= domains; cs->flags[i] = MAX2(cs->flags[i], priority); return i; } /* New relocation, check if the backing array is large enough. */ if (cs->num_buffers >= cs->max_num_buffers) { uint32_t size; cs->max_num_buffers += 10; size = cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer); cs->buffers = realloc(cs->buffers, size); size = cs->max_num_buffers * sizeof(amdgpu_bo_handle); cs->handles = realloc(cs->handles, size); cs->flags = realloc(cs->flags, cs->max_num_buffers); } /* Initialize the new relocation. */ cs->buffers[cs->num_buffers].bo = NULL; amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo); cs->handles[cs->num_buffers] = bo->bo; cs->flags[cs->num_buffers] = priority; p_atomic_inc(&bo->num_cs_references); reloc = &cs->buffers[cs->num_buffers]; reloc->bo = bo; reloc->usage = usage; reloc->domains = domains; cs->buffer_indices_hashlist[hash] = cs->num_buffers; *added_domains = domains; return cs->num_buffers++; }
static void egl_g3d_invalid_surface(struct native_display *ndpy, struct native_surface *nsurf, unsigned int seq_num) { /* XXX not thread safe? */ struct egl_g3d_surface *gsurf = egl_g3d_surface(nsurf->user_data); if (gsurf && gsurf->stfbi) p_atomic_inc(&gsurf->stfbi->stamp); }
static int thread_function(void *thread_data) { int thread_id = *((int *) thread_data); LOG("thread %d starting\n", thread_id); os_time_sleep(thread_id * 100 * 1000); LOG("thread %d before barrier\n", thread_id); CHECK(p_atomic_read(&proceeded) == 0); p_atomic_inc(&waiting); pipe_barrier_wait(&barrier); CHECK(p_atomic_read(&waiting) == NUM_THREADS); p_atomic_inc(&proceeded); LOG("thread %d exiting\n", thread_id); return 0; }
void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs) { retry: pipe_mutex_lock(ws->cs_stack_lock); if (p_atomic_read(&ws->ncs) >= RING_LAST) { /* no room left for a flush */ pipe_mutex_unlock(ws->cs_stack_lock); goto retry; } ws->cs_stack[p_atomic_read(&ws->ncs)] = cs; p_atomic_inc(&ws->ncs); pipe_mutex_unlock(ws->cs_stack_lock); pipe_semaphore_signal(&ws->cs_queued); }
static void vmw_swc_shader_relocation(struct svga_winsys_context *swc, uint32 *shid, uint32 *mobid, uint32 *offset, struct svga_winsys_gb_shader *shader, unsigned flags) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); struct vmw_winsys_screen *vws = vswc->vws; struct vmw_svga_winsys_shader *vshader; struct vmw_ctx_validate_item *ishader; if(!shader) { *shid = SVGA3D_INVALID_ID; return; } vshader = vmw_svga_winsys_shader(shader); if (!vws->base.have_vgpu10) { assert(vswc->shader.staged < vswc->shader.reserved); ishader = util_hash_table_get(vswc->hash, vshader); if (ishader == NULL) { ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged]; vmw_svga_winsys_shader_reference(&ishader->vshader, vshader); ishader->referenced = FALSE; /* * Note that a failure here may just fall back to unhashed behavior * and potentially cause unnecessary flushing, so ignore the * return code. */ (void) util_hash_table_set(vswc->hash, vshader, ishader); ++vswc->shader.staged; } if (!ishader->referenced) { ishader->referenced = TRUE; p_atomic_inc(&vshader->validated); } } if (shid) *shid = vshader->shid; if (vshader->buf) vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf, 0, SVGA_RELOC_READ); }
static struct pipe_fence_handle * amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type, unsigned ip_instance, unsigned ring) { struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); fence->reference.count = 1; fence->ctx = ctx; fence->fence.context = ctx->ctx; fence->fence.ip_type = ip_type; fence->fence.ip_instance = ip_instance; fence->fence.ring = ring; p_atomic_inc(&ctx->refcount); return (struct pipe_fence_handle *)fence; }
/** * Add the buffer to the fenced list. * * Reference count should be incremented before calling this function. */ static INLINE void fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, struct fenced_buffer *fenced_buf) { assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); p_atomic_inc(&fenced_buf->base.base.reference.count); LIST_DEL(&fenced_buf->head); assert(fenced_mgr->num_unfenced); --fenced_mgr->num_unfenced; LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); ++fenced_mgr->num_fenced; }
static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs, struct radeon_bo *bo) { struct radeon_cs_context *csc = cs->csc; unsigned hash; struct radeon_bo_item *item; int idx; int real_idx; idx = radeon_lookup_buffer(csc, bo); if (idx >= 0) return idx; real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real); /* Check if the backing array is large enough. */ if (csc->num_slab_buffers >= csc->max_slab_buffers) { unsigned new_max = MAX2(csc->max_slab_buffers + 16, (unsigned)(csc->max_slab_buffers * 1.3)); struct radeon_bo_item *new_buffers = REALLOC(csc->slab_buffers, csc->max_slab_buffers * sizeof(*new_buffers), new_max * sizeof(*new_buffers)); if (!new_buffers) { fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n"); return -1; } csc->max_slab_buffers = new_max; csc->slab_buffers = new_buffers; } /* Initialize the new relocation. */ idx = csc->num_slab_buffers++; item = &csc->slab_buffers[idx]; item->bo = NULL; item->u.slab.real_idx = real_idx; radeon_bo_reference(&item->bo, bo); p_atomic_inc(&bo->num_cs_references); hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); csc->reloc_indices_hashlist[hash] = idx; return idx; }
boolean util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_context *ctx, struct pipe_resource *pt, unsigned level, unsigned layer, struct pipe_surface **res) { struct pipe_surface *ps; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) { /* or 2D array */ if(!us->u.hash) us->u.hash = cso_hash_create(); ps = cso_hash_iter_data(cso_hash_find(us->u.hash, (layer << 8) | level)); } else { if(!us->u.array) us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); ps = us->u.array[level]; } if(ps && ps->context == ctx) { p_atomic_inc(&ps->reference.count); *res = ps; return FALSE; } ps = (struct pipe_surface *)CALLOC(1, surface_struct_size); if(!ps) { *res = NULL; return FALSE; } pipe_surface_init(ctx, ps, pt, level, layer); if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) cso_hash_insert(us->u.hash, (layer << 8) | level, ps); else us->u.array[level] = ps; *res = ps; return TRUE; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; radeon_drm_cs_sync_flush(cs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty, emit it in a separate thread. */ if (cs->base.cdw) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->cs.num_chunks = 3; cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS; } else { cs->cst->cs.num_chunks = 2; } if (cs->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; pipe_semaphore_signal(&cs->flush_queued); } else { radeon_drm_cs_emit_ioctl_oneshot(cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static void virgl_vtest_add_res(struct virgl_vtest_winsys *vtws, struct virgl_vtest_cmd_buf *cbuf, struct virgl_hw_res *res) { unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1); if (cbuf->cres > cbuf->nres) { fprintf(stderr,"failure to add relocation\n"); return; } cbuf->res_bo[cbuf->cres] = NULL; virgl_vtest_resource_reference(vtws, &cbuf->res_bo[cbuf->cres], res); cbuf->is_handle_added[hash] = TRUE; cbuf->reloc_indices_hashlist[hash] = cbuf->cres; p_atomic_inc(&res->num_cs_references); cbuf->cres++; }
static struct radeon_winsys_cs * radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), void *flush_ctx, struct radeon_winsys_cs_handle *trace_buf) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx; struct radeon_drm_cs *cs; cs = CALLOC_STRUCT(radeon_drm_cs); if (!cs) { return NULL; } pipe_semaphore_init(&cs->flush_completed, 1); cs->ws = ws; cs->flush_cs = flush; cs->flush_data = flush_ctx; cs->trace_buf = (struct radeon_bo*)trace_buf; if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); return NULL; } if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } /* Set the first command buffer as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; cs->base.buf = cs->csc->buf; cs->base.ring_type = ring_type; cs->base.max_dw = ARRAY_SIZE(cs->csc->buf); p_atomic_inc(&ws->num_cs); return &cs->base; }
static void si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex) { if (tex->cmask_buffer || !tex->surface.cmask_size) return; tex->cmask_buffer = si_aligned_buffer_create(&sscreen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, tex->surface.cmask_size, tex->surface.cmask_alignment); if (tex->cmask_buffer == NULL) return; tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8; tex->cb_color_info |= S_028C70_FAST_CLEAR(1); p_atomic_inc(&sscreen->compressed_colortex_counter); }
static struct radeon_cmdbuf * radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), void *flush_ctx, bool stop_exec_on_failure) { struct radeon_drm_winsys *ws = ((struct radeon_ctx*)ctx)->ws; struct radeon_drm_cs *cs; cs = CALLOC_STRUCT(radeon_drm_cs); if (!cs) { return NULL; } util_queue_fence_init(&cs->flush_completed); cs->ws = ws; cs->flush_cs = flush; cs->flush_data = flush_ctx; if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); return NULL; } if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } /* Set the first command buffer as current. */ cs->csc = &cs->csc1; cs->cst = &cs->csc2; cs->base.current.buf = cs->csc->buf; cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf); cs->ring_type = ring_type; p_atomic_inc(&ws->num_cs); return &cs->base; }
static void vmw_swc_surface_only_relocation(struct svga_winsys_context *swc, uint32 *where, struct vmw_svga_winsys_surface *vsurf, unsigned flags) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); struct vmw_ctx_validate_item *isrf; assert(vswc->surface.staged < vswc->surface.reserved); isrf = util_hash_table_get(vswc->hash, vsurf); if (isrf == NULL) { isrf = &vswc->surface.items[vswc->surface.used + vswc->surface.staged]; vmw_svga_winsys_surface_reference(&isrf->vsurf, vsurf); isrf->referenced = FALSE; /* * Note that a failure here may just fall back to unhashed behavior * and potentially cause unnecessary flushing, so ignore the * return code. */ (void) util_hash_table_set(vswc->hash, vsurf, isrf); ++vswc->surface.staged; vswc->seen_surfaces += vsurf->size; if ((swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) && vswc->seen_surfaces >= vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR) vswc->preemptive_flush = TRUE; } if (!(flags & SVGA_RELOC_INTERNAL) && !isrf->referenced) { isrf->referenced = TRUE; p_atomic_inc(&vsurf->validated); } if (where) *where = vsurf->sid; }
struct pipe_surface * util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { struct pipe_surface *ps; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) { /* or 2D array */ if(!us->u.hash) us->u.hash = cso_hash_create(); ps = cso_hash_iter_data(cso_hash_find(us->u.hash, ((zslice + face) << 8) | level)); } else { if(!us->u.array) us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); ps = us->u.array[level]; } if(ps) { p_atomic_inc(&ps->reference.count); return ps; } ps = (struct pipe_surface *)CALLOC(1, surface_struct_size); if(!ps) return NULL; pipe_surface_init(ps, pt, face, level, zslice, flags); ps->offset = ~0; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) cso_hash_insert(us->u.hash, ((zslice + face) << 8) | level, ps); else us->u.array[level] = ps; return ps; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (flags & RADEON_FLUSH_COMPUTE) { if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } radeon_drm_cs_sync_flush(rcs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; radeon_drm_ws_queue_cs(cs->ws, cs); } else { pipe_mutex_lock(cs->ws->cs_stack_lock); if (cs->ws->thread) { while (p_atomic_read(&cs->ws->ncs)) { pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock); } } pipe_mutex_unlock(cs->ws->cs_stack_lock); radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, enum radeon_bo_domain *added_domains) { struct radeon_cs_context *csc = cs->csc; struct drm_radeon_cs_reloc *reloc; unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; bool update_hash = TRUE; int i; *added_domains = 0; if (csc->is_handle_added[hash]) { i = csc->reloc_indices_hashlist[hash]; reloc = &csc->relocs[i]; if (reloc->handle != bo->handle) { /* Hash collision, look for the BO in the list of relocs linearly. */ for (i = csc->crelocs - 1; i >= 0; i--) { reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ break; } } } if (i >= 0) { /* On DMA ring we need to emit as many relocation as there is use of the bo * thus each time this function is call we should grow add again the bo to * the relocation buffer * * Do not update the hash table if it's dma ring, so that first hash always point * to first bo relocation which will the one used by the kernel. Following relocation * will be ignore by the kernel memory placement (but still use by the kernel to * update the cmd stream with proper buffer offset). */ update_hash = FALSE; update_reloc_domains(reloc, rd, wd, added_domains); if (cs->base.ring_type != RING_DMA) { csc->reloc_indices_hashlist[hash] = i; return i; } } } /* New relocation, check if the backing array is large enough. */ if (csc->crelocs >= csc->nrelocs) { uint32_t size; csc->nrelocs += 10; size = csc->nrelocs * sizeof(struct radeon_bo*); csc->relocs_bo = realloc(csc->relocs_bo, size); size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); csc->relocs = realloc(csc->relocs, size); csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; } /* Initialize the new relocation. */ csc->relocs_bo[csc->crelocs] = NULL; radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo); p_atomic_inc(&bo->num_cs_references); reloc = &csc->relocs[csc->crelocs]; reloc->handle = bo->handle; reloc->read_domains = rd; reloc->write_domain = wd; reloc->flags = 0; csc->is_handle_added[hash] = TRUE; if (update_hash) { csc->reloc_indices_hashlist[hash] = csc->crelocs; } csc->chunks[1].length_dw += RELOC_DWORDS; *added_domains = rd | wd; return csc->crelocs++; }
static void *si_create_compute_state( struct pipe_context *ctx, const struct pipe_compute_state *cso) { struct si_context *sctx = (struct si_context *)ctx; struct si_screen *sscreen = (struct si_screen *)ctx->screen; struct si_compute *program = CALLOC_STRUCT(si_compute); struct si_shader *shader = &program->shader; program->ir_type = cso->ir_type; program->local_size = cso->req_local_mem; program->private_size = cso->req_private_mem; program->input_size = cso->req_input_mem; program->use_code_object_v2 = HAVE_LLVM >= 0x0400 && cso->ir_type == PIPE_SHADER_IR_NATIVE; if (cso->ir_type == PIPE_SHADER_IR_TGSI) { struct si_shader_selector sel; bool scratch_enabled; memset(&sel, 0, sizeof(sel)); sel.tokens = tgsi_dup_tokens(cso->prog); if (!sel.tokens) { FREE(program); return NULL; } tgsi_scan_shader(cso->prog, &sel.info); sel.type = PIPE_SHADER_COMPUTE; sel.local_size = cso->req_local_mem; p_atomic_inc(&sscreen->b.num_shaders_created); program->shader.selector = &sel; if (si_shader_create(sscreen, sctx->tm, &program->shader, &sctx->b.debug)) { FREE(sel.tokens); FREE(program); return NULL; } scratch_enabled = shader->config.scratch_bytes_per_wave > 0; shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(shader->config.float_mode); shader->config.rsrc2 = S_00B84C_USER_SGPR(SI_CS_NUM_USER_SGPR) | S_00B84C_SCRATCH_EN(scratch_enabled) | S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) | S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | S_00B84C_LDS_SIZE(shader->config.lds_size); FREE(sel.tokens); } else { const struct pipe_llvm_program_header *header; const char *code; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); radeon_elf_read(code, header->num_bytes, &program->shader.binary); if (program->use_code_object_v2) { const amd_kernel_code_t *code_object = si_compute_get_code_object(program, 0); code_object_to_config(code_object, &program->shader.config); } else { si_shader_binary_read_config(&program->shader.binary, &program->shader.config, 0); } si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug, PIPE_SHADER_COMPUTE, stderr); si_shader_binary_upload(sctx->screen, &program->shader); } return program; }
/** * Notify the binding context to validate the buffer. */ void xmesa_notify_invalid_buffer(XMesaBuffer b) { p_atomic_inc(&b->stfb->stamp); }