static struct pb_buffer * amdgpu_bo_create(struct radeon_winsys *rws, unsigned size, unsigned alignment, boolean use_reusable_pool, enum radeon_bo_domain domain, enum radeon_bo_flag flags) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_winsys_bo *bo; unsigned usage = 0; /* Don't use VRAM if the GPU doesn't have much. This is only the initial * domain. The kernel is free to move the buffer if it wants to. * * 64MB means no VRAM by todays standards. */ if (domain & RADEON_DOMAIN_VRAM && ws->info.vram_size <= 64*1024*1024) { domain = RADEON_DOMAIN_GTT; flags = RADEON_FLAG_GTT_WC; } /* Align size to page size. This is the minimum alignment for normal * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ size = align(size, ws->gart_page_size); /* Only set one usage bit each for domains and flags, or the cache manager * might consider different sets of domains / flags compatible */ if (domain == RADEON_DOMAIN_VRAM_GTT) usage = 1 << 2; else usage = domain >> 1; assert(flags < sizeof(usage) * 8 - 3); usage |= 1 << (flags + 3); /* Get a buffer from the cache. */ if (use_reusable_pool) { bo = (struct amdgpu_winsys_bo*) pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage); if (bo) return &bo->base; } /* Create a new one. */ bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags); if (!bo) { /* Clear the cache and try again. */ pb_cache_release_all_buffers(&ws->bo_cache); bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags); if (!bo) return NULL; } bo->use_reusable_pool = use_reusable_pool; return &bo->base; }
static struct pb_buffer * amdgpu_bo_create(struct radeon_winsys *rws, unsigned size, unsigned alignment, boolean use_reusable_pool, enum radeon_bo_domain domain, enum radeon_bo_flag flags) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_bo_desc desc; struct pb_manager *provider; struct pb_buffer *buffer; /* Don't use VRAM if the GPU doesn't have much. This is only the initial * domain. The kernel is free to move the buffer if it wants to. * * 64MB means no VRAM by todays standards. */ if (domain & RADEON_DOMAIN_VRAM && ws->info.vram_size <= 64*1024*1024) { domain = RADEON_DOMAIN_GTT; flags = RADEON_FLAG_GTT_WC; } memset(&desc, 0, sizeof(desc)); desc.base.alignment = alignment; /* Align size to page size. This is the minimum alignment for normal * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ size = align(size, ws->gart_page_size); /* Only set one usage bit each for domains and flags, or the cache manager * might consider different sets of domains / flags compatible */ if (domain == RADEON_DOMAIN_VRAM_GTT) desc.base.usage = 1 << 2; else desc.base.usage = domain >> 1; assert(flags < sizeof(desc.base.usage) * 8 - 3); desc.base.usage |= 1 << (flags + 3); desc.initial_domain = domain; desc.flags = flags; /* Assign a buffer manager. */ if (use_reusable_pool) provider = ws->cman; else provider = ws->kman; buffer = provider->create_buffer(provider, size, &desc.base); if (!buffer) return NULL; return (struct pb_buffer*)buffer; }
static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, void *pointer, uint64_t size) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); amdgpu_bo_handle buf_handle; struct amdgpu_winsys_bo *bo; uint64_t va; amdgpu_va_handle va_handle; bo = CALLOC_STRUCT(amdgpu_winsys_bo); if (!bo) return NULL; if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) goto error; if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, 1 << 12, 0, &va, &va_handle, 0)) goto error_va_alloc; if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) goto error_va_map; /* Initialize it. */ pipe_reference_init(&bo->base.reference, 1); bo->bo = buf_handle; bo->base.alignment = 0; bo->base.size = size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; bo->ws = ws; bo->user_ptr = pointer; bo->va = va; bo->va_handle = va_handle; bo->initial_domain = RADEON_DOMAIN_GTT; bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size); amdgpu_add_buffer_to_global_list(bo); return (struct pb_buffer*)bo; error_va_map: amdgpu_va_range_free(va_handle); error_va_alloc: amdgpu_bo_free(buf_handle); error: FREE(bo); return NULL; }
static struct pb_buffer * amdgpu_bo_create(struct radeon_winsys *rws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_winsys_bo *bo; unsigned usage = 0; /* Align size to page size. This is the minimum alignment for normal * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ size = align64(size, ws->info.gart_page_size); alignment = align(alignment, ws->info.gart_page_size); /* Only set one usage bit each for domains and flags, or the cache manager * might consider different sets of domains / flags compatible */ if (domain == RADEON_DOMAIN_VRAM_GTT) usage = 1 << 2; else usage = domain >> 1; assert(flags < sizeof(usage) * 8 - 3); usage |= 1 << (flags + 3); /* Get a buffer from the cache. */ bo = (struct amdgpu_winsys_bo*) pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage); if (bo) return &bo->base; /* Create a new one. */ bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags); if (!bo) { /* Clear the cache and try again. */ pb_cache_release_all_buffers(&ws->bo_cache); bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags); if (!bo) return NULL; } bo->use_reusable_pool = true; return &bo->base; }
static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) { struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx); int r; struct amdgpu_bo_alloc_request alloc_buffer = {}; amdgpu_bo_handle buf_handle; ctx->ws = amdgpu_winsys(ws); ctx->refcount = 1; r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx); if (r) { fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r); FREE(ctx); return NULL; } alloc_buffer.alloc_size = 4 * 1024; alloc_buffer.phys_alignment = 4 *1024; alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; r = amdgpu_bo_alloc(ctx->ws->dev, &alloc_buffer, &buf_handle); if (r) { fprintf(stderr, "amdgpu: amdgpu_bo_alloc failed. (%i)\n", r); amdgpu_cs_ctx_free(ctx->ctx); FREE(ctx); return NULL; } r = amdgpu_bo_cpu_map(buf_handle, (void**)&ctx->user_fence_cpu_address_base); if (r) { fprintf(stderr, "amdgpu: amdgpu_bo_cpu_map failed. (%i)\n", r); amdgpu_bo_free(buf_handle); amdgpu_cs_ctx_free(ctx->ctx); FREE(ctx); return NULL; } memset(ctx->user_fence_cpu_address_base, 0, alloc_buffer.alloc_size); ctx->user_fence_bo = buf_handle; return (struct radeon_winsys_ctx*)ctx; }
static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, unsigned *stride) { struct amdgpu_winsys *ws = amdgpu_winsys(rws); struct amdgpu_winsys_bo *bo; enum amdgpu_bo_handle_type type; struct amdgpu_bo_import_result result = {0}; uint64_t va; amdgpu_va_handle va_handle; struct amdgpu_bo_info info = {0}; enum radeon_bo_domain initial = 0; int r; /* Initialize the structure. */ bo = CALLOC_STRUCT(amdgpu_winsys_bo); if (!bo) { return NULL; } switch (whandle->type) { case DRM_API_HANDLE_TYPE_SHARED: type = amdgpu_bo_handle_type_gem_flink_name; break; case DRM_API_HANDLE_TYPE_FD: type = amdgpu_bo_handle_type_dma_buf_fd; break; default: return NULL; } r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result); if (r) goto error; /* Get initial domains. */ r = amdgpu_bo_query_info(result.buf_handle, &info); if (r) goto error_query; r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, &va, &va_handle, 0); if (r) goto error_query; r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP); if (r) goto error_va_map; if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) initial |= RADEON_DOMAIN_VRAM; if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) initial |= RADEON_DOMAIN_GTT; pipe_reference_init(&bo->base.reference, 1); bo->base.alignment = info.phys_alignment; bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; bo->bo = result.buf_handle; bo->base.size = result.alloc_size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; bo->rws = ws; bo->va = va; bo->va_handle = va_handle; bo->initial_domain = initial; bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); bo->is_shared = true; if (stride) *stride = whandle->stride; if (bo->initial_domain & RADEON_DOMAIN_VRAM) ws->allocated_vram += align(bo->base.size, ws->gart_page_size); else if (bo->initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt += align(bo->base.size, ws->gart_page_size); return &bo->base; error_va_map: amdgpu_va_range_free(va_handle); error_query: amdgpu_bo_free(result.buf_handle); error: FREE(bo); return NULL; }