/** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @rdev: radeon_device pointer * @ib: IB object to schedule * @const_ib: Const IB to schedule (SI only) * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ dev_err(rdev->dev, "couldn't schedule ib\n"); return -EINVAL; } /* 64 dwords should be enough for fence too */ r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_SYNCS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } /* grab a vm id if necessary */ if (ib->vm) { struct radeon_fence *vm_id_fence; vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); } /* sync with other rings */ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); if (r) { dev_err(rdev->dev, "failed to sync rings (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); return r; } if (ib->vm) radeon_vm_flush(rdev, ib->vm, ib->ring); if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); if (r) { dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); return r; } if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } if (ib->vm) radeon_vm_fence(rdev, ib->vm, ib->fence); radeon_ring_unlock_commit(rdev, ring); return 0; }
/** * cik_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer * @resv: reservation object to sync to * * Copy GPU paging using the DMA engine (CIK). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct reservation_object *resv) { struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0; r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return ERR_PTR(r); } size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_semaphore_sync_resv(rdev, sem, resv, false); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; if (cur_size_in_bytes > 0x1fffff) cur_size_in_bytes = 0x1fffff; size_in_bytes -= cur_size_in_bytes; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); radeon_ring_write(ring, cur_size_in_bytes); radeon_ring_write(ring, 0); /* src/dst endian swap */ radeon_ring_write(ring, lower_32_bits(src_offset)); radeon_ring_write(ring, upper_32_bits(src_offset)); radeon_ring_write(ring, lower_32_bits(dst_offset)); radeon_ring_write(ring, upper_32_bits(dst_offset)); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; } r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, fence); return fence; }
/** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @rdev: radeon_device pointer * @ib: IB object to schedule * @const_ib: Const IB to schedule (SI only) * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; bool need_sync = false; int i, r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ dev_err(rdev->dev, "couldn't schedule ib\n"); return -EINVAL; } /* 64 dwords should be enough for fence too */ r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } for (i = 0; i < RADEON_NUM_RINGS; ++i) { struct radeon_fence *fence = ib->sync_to[i]; if (radeon_fence_need_sync(fence, ib->ring)) { need_sync = true; radeon_semaphore_sync_rings(rdev, ib->semaphore, fence->ring, ib->ring); radeon_fence_note_sync(fence, ib->ring); } } /* immediately free semaphore when we don't need to sync */ if (!need_sync) { radeon_semaphore_free(rdev, &ib->semaphore, NULL); } /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush for every IB */ if (ib->vm /*&& !ib->vm->last_flush*/) { radeon_ring_vm_flush(rdev, ib->ring, ib->vm); } if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); if (r) { dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); return r; } if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } /* we just flushed the VM, remember that */ if (ib->vm && !ib->vm->last_flush) { ib->vm->last_flush = radeon_fence_ref(ib->fence); } radeon_ring_unlock_commit(rdev, ring); return 0; }
/** * r600_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer * @resv: reservation object to sync to * * Copy GPU paging using the DMA engine (r6xx). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct reservation_object *resv) { struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0; r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return ERR_PTR(r); } size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_semaphore_sync_resv(rdev, sem, resv, false); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; if (cur_size_in_dw > 0xFFFE) cur_size_in_dw = 0xFFFE; size_in_dw -= cur_size_in_dw; radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); radeon_ring_write(ring, dst_offset & 0xfffffffc); radeon_ring_write(ring, src_offset & 0xfffffffc); radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | (upper_32_bits(src_offset) & 0xff))); src_offset += cur_size_in_dw * 4; dst_offset += cur_size_in_dw * 4; } r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, fence); return fence; }
static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_fence **fence) { uint32_t handle = ring->idx ^ 0xdeafbeef; int r; if (ring->idx == R600_RING_TYPE_UVD_INDEX) { r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); if (r) { DRM_ERROR("Failed to get dummy create msg\n"); return r; } r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); if (r) { DRM_ERROR("Failed to get dummy destroy msg\n"); return r; } } else if (ring->idx == TN_RING_TYPE_VCE1_INDEX || ring->idx == TN_RING_TYPE_VCE2_INDEX) { r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL); if (r) { DRM_ERROR("Failed to get dummy create msg\n"); return r; } r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence); if (r) { DRM_ERROR("Failed to get dummy destroy msg\n"); return r; } } else { r = radeon_ring_lock(rdev, ring, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ring->idx); return r; } r = radeon_fence_emit(rdev, fence, ring->idx); if (r) { DRM_ERROR("Failed to emit fence\n"); radeon_ring_unlock_undo(rdev, ring); return r; } radeon_ring_unlock_commit(rdev, ring, false); } return 0; }
static void radeon_test_ring_sync2(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB, struct radeon_ring *ringC) { struct radeon_fence *fenceA = NULL, *fenceB = NULL; struct radeon_semaphore *semaphore = NULL; bool sigA, sigB; int i, r; r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); r = radeon_fence_emit(rdev, &fenceA, ringA->idx); if (r) { DRM_ERROR("Failed to emit sync fence 1\n"); radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } radeon_ring_unlock_commit(rdev, ringA); r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); r = radeon_fence_emit(rdev, &fenceB, ringB->idx); if (r) { DRM_ERROR("Failed to create sync fence 2\n"); radeon_ring_unlock_undo(rdev, ringB); goto out_cleanup; } radeon_ring_unlock_commit(rdev, ringB); mdelay(1000); if (radeon_fence_signaled(fenceA)) { DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); goto out_cleanup; } if (radeon_fence_signaled(fenceB)) { DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); radeon_ring_unlock_commit(rdev, ringC); for (i = 0; i < 30; ++i) { mdelay(100); sigA = radeon_fence_signaled(fenceA); sigB = radeon_fence_signaled(fenceB); if (sigA || sigB) break; } if (!sigA && !sigB) { DRM_ERROR("Neither fence A nor B has been signaled\n"); goto out_cleanup; } else if (sigA && sigB) { DRM_ERROR("Both fence A and B has been signaled\n"); goto out_cleanup; } DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); r = radeon_ring_lock(rdev, ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); radeon_ring_unlock_commit(rdev, ringC); mdelay(1000); r = radeon_fence_wait(fenceA, false); if (r) { DRM_ERROR("Failed to wait for sync fence A\n"); goto out_cleanup; } r = radeon_fence_wait(fenceB, false); if (r) { DRM_ERROR("Failed to wait for sync fence B\n"); goto out_cleanup; } out_cleanup: radeon_semaphore_free(rdev, &semaphore, NULL); if (fenceA) radeon_fence_unref(&fenceA); if (fenceB) radeon_fence_unref(&fenceB); if (r) DRM_ERROR("Error while testing ring sync (%d).\n", r); }
void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB) { struct radeon_fence *fence1 = NULL, *fence2 = NULL; struct radeon_semaphore *semaphore = NULL; int r; r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); r = radeon_fence_emit(rdev, &fence1, ringA->idx); if (r) { DRM_ERROR("Failed to emit fence 1\n"); radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); r = radeon_fence_emit(rdev, &fence2, ringA->idx); if (r) { DRM_ERROR("Failed to emit fence 2\n"); radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } radeon_ring_unlock_commit(rdev, ringA); mdelay(1000); if (radeon_fence_signaled(fence1)) { DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence1, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } mdelay(1000); if (radeon_fence_signaled(fence2)) { DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence2, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } out_cleanup: radeon_semaphore_free(rdev, &semaphore, NULL); if (fence1) radeon_fence_unref(&fence1); if (fence2) radeon_fence_unref(&fence2); if (r) DRM_ERROR("Error while testing ring sync (%d).\n", r); }