static void amdgpu_bo_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); int i; amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); amdgpu_va_range_free(bo->va_handle); amdgpu_bo_free(bo->bo); for (i = 0; i < RING_LAST; i++) amdgpu_fence_reference(&bo->fence[i], NULL); if (bo->initial_domain & RADEON_DOMAIN_VRAM) bo->rws->allocated_vram -= align(bo->base.size, bo->rws->gart_page_size); else if (bo->initial_domain & RADEON_DOMAIN_GTT) bo->rws->allocated_gtt -= align(bo->base.size, bo->rws->gart_page_size); FREE(bo); }
void amdgpu_bo_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); int i; pipe_mutex_lock(bo->ws->global_bo_list_lock); LIST_DEL(&bo->global_list_item); bo->ws->num_buffers--; pipe_mutex_unlock(bo->ws->global_bo_list_lock); amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); amdgpu_va_range_free(bo->va_handle); amdgpu_bo_free(bo->bo); for (i = 0; i < RING_LAST; i++) amdgpu_fence_reference(&bo->fence[i], NULL); if (bo->initial_domain & RADEON_DOMAIN_VRAM) bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size); else if (bo->initial_domain & RADEON_DOMAIN_GTT) bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size); FREE(bo); }
static void amdgpu_cs_do_submission(struct amdgpu_cs *cs, struct pipe_fence_handle **out_fence) { struct amdgpu_winsys *ws = cs->ctx->ws; struct pipe_fence_handle *fence; int i, j, r; /* Create a fence. */ fence = amdgpu_fence_create(cs->ctx, cs->request.ip_type, cs->request.ip_instance, cs->request.ring); if (out_fence) amdgpu_fence_reference(out_fence, fence); cs->request.number_of_dependencies = 0; /* Since the kernel driver doesn't synchronize execution between different * rings automatically, we have to add fence dependencies manually. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < cs->num_buffers; i++) { for (j = 0; j < RING_LAST; j++) { struct amdgpu_cs_fence *dep; unsigned idx; struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j]; if (!bo_fence) continue; if (bo_fence->ctx == cs->ctx && bo_fence->fence.ip_type == cs->request.ip_type && bo_fence->fence.ip_instance == cs->request.ip_instance && bo_fence->fence.ring == cs->request.ring) continue; if (amdgpu_fence_wait((void *)bo_fence, 0, false)) continue; idx = cs->request.number_of_dependencies++; if (idx >= cs->max_dependencies) { unsigned size; cs->max_dependencies = idx + 8; size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence); cs->request.dependencies = realloc(cs->request.dependencies, size); } dep = &cs->request.dependencies[idx]; memcpy(dep, &bo_fence->fence, sizeof(*dep)); } } cs->request.fence_info.handle = NULL; if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) { cs->request.fence_info.handle = cs->ctx->user_fence_bo; cs->request.fence_info.offset = cs->base.ring_type; } r = amdgpu_cs_submit(cs->ctx->ctx, 0, &cs->request, 1); if (r) { if (r == -ENOMEM) fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); else fprintf(stderr, "amdgpu: The CS has been rejected, " "see dmesg for more information.\n"); amdgpu_fence_signalled(fence); } else { /* Success. */ uint64_t *user_fence = NULL; if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) user_fence = cs->ctx->user_fence_cpu_address_base + cs->request.fence_info.offset; amdgpu_fence_submitted(fence, &cs->request, user_fence); for (i = 0; i < cs->num_buffers; i++) amdgpu_fence_reference(&cs->buffers[i].bo->fence[cs->base.ring_type], fence); } pipe_mutex_unlock(ws->bo_fence_lock); amdgpu_fence_reference(&fence, NULL); }
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) { struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf); struct amdgpu_winsys *ws = bo->rws; int i; if (bo->is_shared) { /* We can't use user fences for shared buffers, because user fences * are local to this process only. If we want to wait for all buffer * uses in all processes, we have to use amdgpu_bo_wait_for_idle. */ bool buffer_busy = true; int r; r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy); if (r) fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__, r); return !buffer_busy; } if (timeout == 0) { /* Timeout == 0 is quite simple. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < RING_LAST; i++) if (bo->fence[i]) { if (amdgpu_fence_wait(bo->fence[i], 0, false)) { /* Release the idle fence to avoid checking it again later. */ amdgpu_fence_reference(&bo->fence[i], NULL); } else { pipe_mutex_unlock(ws->bo_fence_lock); return false; } } pipe_mutex_unlock(ws->bo_fence_lock); return true; } else { struct pipe_fence_handle *fence[RING_LAST] = {}; bool fence_idle[RING_LAST] = {}; bool buffer_idle = true; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); /* Take references to all fences, so that we can wait for them * without the lock. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < RING_LAST; i++) amdgpu_fence_reference(&fence[i], bo->fence[i]); pipe_mutex_unlock(ws->bo_fence_lock); /* Now wait for the fences. */ for (i = 0; i < RING_LAST; i++) { if (fence[i]) { if (amdgpu_fence_wait(fence[i], abs_timeout, true)) fence_idle[i] = true; else buffer_idle = false; } } /* Release idle fences to avoid checking them again later. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < RING_LAST; i++) { if (fence[i] == bo->fence[i] && fence_idle[i]) amdgpu_fence_reference(&bo->fence[i], NULL); amdgpu_fence_reference(&fence[i], NULL); } pipe_mutex_unlock(ws->bo_fence_lock); return buffer_idle; } }