/** * radeon_fence_wait - wait for a fence to signal * * @fence: radeon fence object * @intr: use interruptible sleep * * Wait for the requested fence to signal (all asics). * @intr selects whether to use interruptable (true) or non-interruptable * (false) sleep when waiting for the fence. * Returns 0 if the fence has passed, error for all other cases. */ int radeon_fence_wait(struct radeon_fence *fence, bool intr) { uint64_t seq[RADEON_NUM_RINGS] = {}; long r; /* * This function should not be called on !radeon fences. * If this is the case, it would mean this function can * also be called on radeon fences belonging to another card. * exclusive_lock is not held in that case. */ if (WARN_ON_ONCE(!to_radeon_fence(&fence->base))) return fence_wait(&fence->base, intr); seq[fence->ring] = fence->seq; r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) { return r; } r = fence_signal(&fence->base); if (!r) FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); return 0; }
/** * uvd_v5_0_ring_test_ib - test ib execution * * @ring: amdgpu_ring pointer * * Test if we can successfully execute an IB */ static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct fence *fence = NULL; int r; r = amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); if (r) { DRM_ERROR("amdgpu: failed to raise UVD clocks (%d).\n", r); return r; } r = amdgpu_uvd_get_create_msg(ring, 1, NULL); if (r) { DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r); goto error; } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); goto error; } r = fence_wait(fence, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto error; } DRM_INFO("ib test on ring %d succeeded\n", ring->idx); error: fence_put(fence); amdgpu_asic_set_uvd_clocks(adev, 0, 0); return r; }
/** * uvd_v6_0_ring_test_ib - test ib execution * * @ring: amdgpu_ring pointer * * Test if we can successfully execute an IB */ static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring) { struct fence *fence = NULL; int r; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); if (r) { DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r); goto error; } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); goto error; } r = fence_wait(fence, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto error; } DRM_INFO("ib test on ring %d succeeded\n", ring->idx); error: fence_put(fence); return r; }
static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, uint64_t saddr, uint64_t daddr, int n) { unsigned long start_jiffies; unsigned long end_jiffies; struct fence *fence = NULL; int i, r; start_jiffies = jiffies; for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, false); if (r) goto exit_do_move; r = fence_wait(fence, false); if (r) goto exit_do_move; fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: if (fence) fence_put(fence); return r; }
/* must be called before _move_to_active().. */ int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct reservation_object_list *fobj; struct fence *fence; int i, ret; if (!exclusive) { /* NOTE: _reserve_shared() must happen before _add_shared_fence(), * which makes this a slightly strange place to call it. OTOH this * is a convenient can-fail point to hook it in. (And similar to * how etnaviv and nouveau handle this.) */ ret = reservation_object_reserve_shared(msm_obj->resv); if (ret) return ret; } fobj = reservation_object_get_list(msm_obj->resv); if (!fobj || (fobj->shared_count == 0)) { fence = reservation_object_get_excl(msm_obj->resv); /* don't need to wait on our own fences, since ring is fifo */ if (fence && (fence->context != fctx->context)) { ret = fence_wait(fence, true); if (ret) return ret; } } if (!exclusive || !fobj) return 0; for (i = 0; i < fobj->shared_count; i++) { fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(msm_obj->resv)); if (fence->context != fctx->context) { ret = fence_wait(fence, true); if (ret) return ret; } } return 0; }
static void ipu_flip_fence_work_func(struct work_struct *__work) { struct ipu_flip_work *work = container_of(__work, struct ipu_flip_work, fence_work); int i; /* wait for all fences attached to the FB obj to signal */ if (work->excl) { fence_wait(work->excl, false); fence_put(work->excl); } for (i = 0; i < work->shared_count; i++) { fence_wait(work->shared[i], false); fence_put(work->shared[i]); } work->crtc->flip_state = IPU_FLIP_SUBMITTED; }
/** * amdgpu_fence_wait_empty - wait for all fences to signal * * @adev: amdgpu device pointer * @ring: ring index the fence is associated with * * Wait for all fences on the requested ring to signal (all asics). * Returns 0 if the fences have passed, error for all other cases. */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq); struct fence *fence, **ptr; int r; if (!seq) return 0; ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; rcu_read_lock(); fence = rcu_dereference(*ptr); if (!fence || !fence_get_rcu(fence)) { rcu_read_unlock(); return 0; } rcu_read_unlock(); r = fence_wait(fence, false); fence_put(fence); return r; }
/** * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with * @f: resulting fence object * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; struct fence *old, **ptr; uint32_t seq; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) return -ENOMEM; seq = ++ring->fence_drv.sync_seq; fence->ring = ring; fence_init(&fence->base, &amdgpu_fence_ops, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ old = rcu_dereference_protected(*ptr, 1); if (old && !fence_is_signaled(old)) { DRM_INFO("rcu slot is busy\n"); fence_wait(old, false); } rcu_assign_pointer(*ptr, fence_get(&fence->base)); *f = &fence->base; return 0; }
/** * cik_sdma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information * * Test a simple IB in the DMA ring (CIK). * Returns 0 on success, error on failure. */ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct fence *f = NULL; unsigned i; unsigned index; int r; u32 tmp = 0; u64 gpu_addr; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); goto err0; } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib.ptr[1] = lower_32_bits(gpu_addr); ib.ptr[2] = upper_32_bits(gpu_addr); ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, AMDGPU_FENCE_OWNER_UNDEFINED, &f); if (r) goto err1; r = fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; } for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ring->idx, i); goto err1; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } err1: fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); return r; }
static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, struct amdgpu_ring *ringA, struct amdgpu_ring *ringB, struct amdgpu_ring *ringC) { struct fence *fenceA = NULL, *fenceB = NULL; struct amdgpu_semaphore *semaphore = NULL; bool sigA, sigB; int i, r; r = amdgpu_semaphore_create(adev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = amdgpu_ring_lock(ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } amdgpu_semaphore_emit_wait(ringA, semaphore); amdgpu_ring_unlock_commit(ringA); r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA); if (r) goto out_cleanup; r = amdgpu_ring_lock(ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); goto out_cleanup; } amdgpu_semaphore_emit_wait(ringB, semaphore); amdgpu_ring_unlock_commit(ringB); r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB); if (r) goto out_cleanup; mdelay(1000); if (fence_is_signaled(fenceA)) { DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); goto out_cleanup; } if (fence_is_signaled(fenceB)) { DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); goto out_cleanup; } r = amdgpu_ring_lock(ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } amdgpu_semaphore_emit_signal(ringC, semaphore); amdgpu_ring_unlock_commit(ringC); for (i = 0; i < 30; ++i) { mdelay(100); sigA = fence_is_signaled(fenceA); sigB = fence_is_signaled(fenceB); if (sigA || sigB) break; } if (!sigA && !sigB) { DRM_ERROR("Neither fence A nor B has been signaled\n"); goto out_cleanup; } else if (sigA && sigB) { DRM_ERROR("Both fence A and B has been signaled\n"); goto out_cleanup; } DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); r = amdgpu_ring_lock(ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } amdgpu_semaphore_emit_signal(ringC, semaphore); amdgpu_ring_unlock_commit(ringC); mdelay(1000); r = fence_wait(fenceA, false); if (r) { DRM_ERROR("Failed to wait for sync fence A\n"); goto out_cleanup; } r = fence_wait(fenceB, false); if (r) { DRM_ERROR("Failed to wait for sync fence B\n"); goto out_cleanup; } out_cleanup: amdgpu_semaphore_free(adev, &semaphore, NULL); if (fenceA) fence_put(fenceA); if (fenceB) fence_put(fenceB); if (r) printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); }
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ static void amdgpu_do_test_moves(struct amdgpu_device *adev) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; uint64_t gtt_addr, vram_addr; unsigned n, size; int i, r; size = 1024 * 1024; /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; if (adev->wb.wb_obj) n -= AMDGPU_GPU_PAGE_SIZE; if (adev->irq.ih.ring_obj) n -= adev->irq.ih.ring_size; n /= size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { DRM_ERROR("Failed to allocate %d pointers\n", n); r = 1; goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; } r = amdgpu_bo_reserve(vram_obj, false); if (unlikely(r != 0)) goto out_unref; r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr); if (r) { DRM_ERROR("Failed to pin VRAM object\n"); goto out_unres; } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; struct fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; } r = amdgpu_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_lclean_unref; r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, >t_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_lclean_unres; } r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size; gtt_start < gtt_end; gtt_start++) *gtt_start = gtt_start; amdgpu_bo_kunmap(gtt_obj[i]); r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr, size, NULL, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_lclean_unpin; } r = fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_lclean_unpin; } fence_put(fence); r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { DRM_ERROR("Failed to map VRAM object after copy %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT/VRAM offset " "0x%16llx/0x%16llx)\n", i, *vram_start, gtt_start, (unsigned long long) (gtt_addr - adev->mc.gtt_start + (void*)gtt_start - gtt_map), (unsigned long long) (vram_addr - adev->mc.vram_start + (void*)gtt_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; } *vram_start = vram_start; } amdgpu_bo_kunmap(vram_obj); r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr, size, NULL, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_lclean_unpin; } r = fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_lclean_unpin; } fence_put(fence); r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object after copy %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; gtt_start < gtt_end; gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM/GTT offset " "0x%16llx/0x%16llx)\n", i, *gtt_start, vram_start, (unsigned long long) (vram_addr - adev->mc.vram_start + (void*)vram_start - vram_map), (unsigned long long) (gtt_addr - adev->mc.gtt_start + (void*)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; } } amdgpu_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", gtt_addr - adev->mc.gtt_start); continue; out_lclean_unpin: amdgpu_bo_unpin(gtt_obj[i]); out_lclean_unres: amdgpu_bo_unreserve(gtt_obj[i]); out_lclean_unref: amdgpu_bo_unref(>t_obj[i]); out_lclean: for (--i; i >= 0; --i) { amdgpu_bo_unpin(gtt_obj[i]); amdgpu_bo_unreserve(gtt_obj[i]); amdgpu_bo_unref(>t_obj[i]); } if (fence) fence_put(fence); break; } amdgpu_bo_unpin(vram_obj); out_unres: amdgpu_bo_unreserve(vram_obj); out_unref: amdgpu_bo_unref(&vram_obj); out_cleanup: kfree(gtt_obj); if (r) { printk(KERN_WARNING "Error while testing BO move.\n"); } }
void amdgpu_test_ring_sync(struct amdgpu_device *adev, struct amdgpu_ring *ringA, struct amdgpu_ring *ringB) { struct fence *fence1 = NULL, *fence2 = NULL; struct amdgpu_semaphore *semaphore = NULL; int r; r = amdgpu_semaphore_create(adev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = amdgpu_ring_lock(ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } amdgpu_semaphore_emit_wait(ringA, semaphore); amdgpu_ring_unlock_commit(ringA); r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1); if (r) goto out_cleanup; r = amdgpu_ring_lock(ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } amdgpu_semaphore_emit_wait(ringA, semaphore); amdgpu_ring_unlock_commit(ringA); r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2); if (r) goto out_cleanup; mdelay(1000); if (fence_is_signaled(fence1)) { DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = amdgpu_ring_lock(ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } amdgpu_semaphore_emit_signal(ringB, semaphore); amdgpu_ring_unlock_commit(ringB); r = fence_wait(fence1, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } mdelay(1000); if (fence_is_signaled(fence2)) { DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = amdgpu_ring_lock(ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } amdgpu_semaphore_emit_signal(ringB, semaphore); amdgpu_ring_unlock_commit(ringB); r = fence_wait(fence2, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } out_cleanup: amdgpu_semaphore_free(adev, &semaphore, NULL); if (fence1) fence_put(fence1); if (fence2) fence_put(fence2); if (r) printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); }