static void amdgpu_cs_do_submission(struct amdgpu_cs *cs, struct pipe_fence_handle **out_fence) { struct amdgpu_winsys *ws = cs->ctx->ws; struct pipe_fence_handle *fence; int i, j, r; /* Create a fence. */ fence = amdgpu_fence_create(cs->ctx, cs->request.ip_type, cs->request.ip_instance, cs->request.ring); if (out_fence) amdgpu_fence_reference(out_fence, fence); cs->request.number_of_dependencies = 0; /* Since the kernel driver doesn't synchronize execution between different * rings automatically, we have to add fence dependencies manually. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < cs->num_buffers; i++) { for (j = 0; j < RING_LAST; j++) { struct amdgpu_cs_fence *dep; unsigned idx; struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j]; if (!bo_fence) continue; if (bo_fence->ctx == cs->ctx && bo_fence->fence.ip_type == cs->request.ip_type && bo_fence->fence.ip_instance == cs->request.ip_instance && bo_fence->fence.ring == cs->request.ring) continue; if (amdgpu_fence_wait((void *)bo_fence, 0, false)) continue; idx = cs->request.number_of_dependencies++; if (idx >= cs->max_dependencies) { unsigned size; cs->max_dependencies = idx + 8; size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence); cs->request.dependencies = realloc(cs->request.dependencies, size); } dep = &cs->request.dependencies[idx]; memcpy(dep, &bo_fence->fence, sizeof(*dep)); } } cs->request.fence_info.handle = NULL; if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) { cs->request.fence_info.handle = cs->ctx->user_fence_bo; cs->request.fence_info.offset = cs->base.ring_type; } r = amdgpu_cs_submit(cs->ctx->ctx, 0, &cs->request, 1); if (r) { if (r == -ENOMEM) fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); else fprintf(stderr, "amdgpu: The CS has been rejected, " "see dmesg for more information.\n"); amdgpu_fence_signalled(fence); } else { /* Success. */ uint64_t *user_fence = NULL; if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) user_fence = cs->ctx->user_fence_cpu_address_base + cs->request.fence_info.offset; amdgpu_fence_submitted(fence, &cs->request, user_fence); for (i = 0; i < cs->num_buffers; i++) amdgpu_fence_reference(&cs->buffers[i].bo->fence[cs->base.ring_type], fence); } pipe_mutex_unlock(ws->bo_fence_lock); amdgpu_fence_reference(&fence, NULL); }
static bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws, struct pipe_fence_handle *fence, uint64_t timeout) { return amdgpu_fence_wait(fence, timeout, false); }
/** * cik_sdma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information * * Test a simple IB in the DMA ring (CIK). * Returns 0 on success, error on failure. */ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; unsigned i; unsigned index; int r; u32 tmp = 0; u64 gpu_addr; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); return r; } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib.ptr[1] = lower_32_bits(gpu_addr); ib.ptr[2] = upper_32_bits(gpu_addr); ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED); if (r) { amdgpu_ib_free(adev, &ib); amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r); return r; } r = amdgpu_fence_wait(ib.fence, false); if (r) { amdgpu_ib_free(adev, &ib); amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); return r; } for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring->idx, i); } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } amdgpu_ib_free(adev, &ib); amdgpu_wb_free(adev, index); return r; }
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) { struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf); struct amdgpu_winsys *ws = bo->rws; int i; if (bo->is_shared) { /* We can't use user fences for shared buffers, because user fences * are local to this process only. If we want to wait for all buffer * uses in all processes, we have to use amdgpu_bo_wait_for_idle. */ bool buffer_busy = true; int r; r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy); if (r) fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__, r); return !buffer_busy; } if (timeout == 0) { /* Timeout == 0 is quite simple. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < RING_LAST; i++) if (bo->fence[i]) { if (amdgpu_fence_wait(bo->fence[i], 0, false)) { /* Release the idle fence to avoid checking it again later. */ amdgpu_fence_reference(&bo->fence[i], NULL); } else { pipe_mutex_unlock(ws->bo_fence_lock); return false; } } pipe_mutex_unlock(ws->bo_fence_lock); return true; } else { struct pipe_fence_handle *fence[RING_LAST] = {}; bool fence_idle[RING_LAST] = {}; bool buffer_idle = true; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); /* Take references to all fences, so that we can wait for them * without the lock. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < RING_LAST; i++) amdgpu_fence_reference(&fence[i], bo->fence[i]); pipe_mutex_unlock(ws->bo_fence_lock); /* Now wait for the fences. */ for (i = 0; i < RING_LAST; i++) { if (fence[i]) { if (amdgpu_fence_wait(fence[i], abs_timeout, true)) fence_idle[i] = true; else buffer_idle = false; } } /* Release idle fences to avoid checking them again later. */ pipe_mutex_lock(ws->bo_fence_lock); for (i = 0; i < RING_LAST; i++) { if (fence[i] == bo->fence[i] && fence_idle[i]) amdgpu_fence_reference(&bo->fence[i], NULL); amdgpu_fence_reference(&fence[i], NULL); } pipe_mutex_unlock(ws->bo_fence_lock); return buffer_idle; } }
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ static void amdgpu_do_test_moves(struct amdgpu_device *adev) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; uint64_t gtt_addr, vram_addr; unsigned n, size; int i, r; size = 1024 * 1024; /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; if (adev->wb.wb_obj) n -= AMDGPU_GPU_PAGE_SIZE; if (adev->irq.ih.ring_obj) n -= adev->irq.ih.ring_size; n /= size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { DRM_ERROR("Failed to allocate %d pointers\n", n); r = 1; goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; } r = amdgpu_bo_reserve(vram_obj, false); if (unlikely(r != 0)) goto out_unref; r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr); if (r) { DRM_ERROR("Failed to pin VRAM object\n"); goto out_unres; } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; struct amdgpu_fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; } r = amdgpu_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_lclean_unref; r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, >t_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_lclean_unres; } r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size; gtt_start < gtt_end; gtt_start++) *gtt_start = gtt_start; amdgpu_bo_kunmap(gtt_obj[i]); r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr, size, NULL, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_lclean_unpin; } r = amdgpu_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_lclean_unpin; } amdgpu_fence_unref(&fence); r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { DRM_ERROR("Failed to map VRAM object after copy %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT/VRAM offset " "0x%16llx/0x%16llx)\n", i, *vram_start, gtt_start, (unsigned long long) (gtt_addr - adev->mc.gtt_start + (void*)gtt_start - gtt_map), (unsigned long long) (vram_addr - adev->mc.vram_start + (void*)gtt_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; } *vram_start = vram_start; } amdgpu_bo_kunmap(vram_obj); r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr, size, NULL, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_lclean_unpin; } r = amdgpu_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_lclean_unpin; } amdgpu_fence_unref(&fence); r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object after copy %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; gtt_start < gtt_end; gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM/GTT offset " "0x%16llx/0x%16llx)\n", i, *gtt_start, vram_start, (unsigned long long) (vram_addr - adev->mc.vram_start + (void*)vram_start - vram_map), (unsigned long long) (gtt_addr - adev->mc.gtt_start + (void*)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; } } amdgpu_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", gtt_addr - adev->mc.gtt_start); continue; out_lclean_unpin: amdgpu_bo_unpin(gtt_obj[i]); out_lclean_unres: amdgpu_bo_unreserve(gtt_obj[i]); out_lclean_unref: amdgpu_bo_unref(>t_obj[i]); out_lclean: for (--i; i >= 0; --i) { amdgpu_bo_unpin(gtt_obj[i]); amdgpu_bo_unreserve(gtt_obj[i]); amdgpu_bo_unref(>t_obj[i]); } if (fence) amdgpu_fence_unref(&fence); break; } amdgpu_bo_unpin(vram_obj); out_unres: amdgpu_bo_unreserve(vram_obj); out_unref: amdgpu_bo_unref(&vram_obj); out_cleanup: kfree(gtt_obj); if (r) { printk(KERN_WARNING "Error while testing BO move.\n"); } }