/** * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine * * @rdev: radeon_device pointer * @ib: IB object to schedule * * Schedule an IB in the DMA ring (r6xx-r7xx). */ void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 4; while ((next_rptr & 7) != 5) next_rptr++; next_rptr += 3; radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); radeon_ring_write(ring, next_rptr); } /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ while ((ring->wptr & 7) != 5) radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); }
/** * r600_dma_fence_ring_emit - emit a fence on the DMA ring * * @rdev: radeon_device pointer * @fence: radeon fence object * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate * an interrupt if needed (r6xx-r7xx). */ void r600_dma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr; /* write the fence */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); radeon_ring_write(ring, lower_32_bits(fence->seq)); /* generate an interrupt */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); }
/** * si_dma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. */ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; unsigned index; u32 tmp = 0; u64 gpu_addr; long r; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err0; } ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); ib.ptr[1] = lower_32_bits(gpu_addr); ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; ib.ptr[3] = 0xDEADBEEF; ib.length_dw = 4; r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err1; r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { DRM_INFO("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: amdgpu_wb_free(adev, index); return r; }
/** * r600_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer * @resv: reservation object to sync to * * Copy GPU paging using the DMA engine (r6xx). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct reservation_object *resv) { struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0; r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return ERR_PTR(r); } size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_semaphore_sync_resv(rdev, sem, resv, false); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; if (cur_size_in_dw > 0xFFFE) cur_size_in_dw = 0xFFFE; size_in_dw -= cur_size_in_dw; radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); radeon_ring_write(ring, dst_offset & 0xfffffffc); radeon_ring_write(ring, src_offset & 0xfffffffc); radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | (upper_32_bits(src_offset) & 0xff))); src_offset += cur_size_in_dw * 4; dst_offset += cur_size_in_dw * 4; } r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, fence); return fence; }
/** * si_dma_emit_fill_buffer - fill buffer using the sDMA engine * * @ring: amdgpu_ring structure holding ring information * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer * * Fill GPU buffers using the DMA engine (VI). */ static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 0, 0, 0, byte_count / 4); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); ib->ptr[ib->length_dw++] = src_data; ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; }
/** * r600_dma_ib_test - test an IB on the DMA engine * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * * Test a simple IB in the DMA ring (r6xx-SI). * Returns 0 on success, error on failure. */ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_ib ib; unsigned i; int r; void __iomem *ptr = (void *)rdev->vram_scratch.ptr; u32 tmp = 0; if (!ptr) { DRM_ERROR("invalid vram scratch pointer\n"); return -EINVAL; } tmp = 0xCAFEDEAD; writel(tmp, ptr); r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); return r; } ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; ib.ptr[3] = 0xDEADBEEF; ib.length_dw = 4; r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); return r; } r = radeon_fence_wait(ib.fence, false); if (r) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); return r; } for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < rdev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); } else { DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } radeon_ib_free(rdev, &ib); return r; }
/** * si_dma_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer * @fence: amdgpu fence object * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate * an interrupt if needed (VI). */ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); amdgpu_ring_write(ring, seq); /* optionally write high bits as well */ if (write64bit) { addr += 4; amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); amdgpu_ring_write(ring, upper_32_bits(seq)); } /* generate an interrupt */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); }
/** * si_dma_emit_copy_buffer - copy buffer using the sDMA engine * * @ring: amdgpu_ring structure holding ring information * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, byte_count); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; }
/** * cik_dma_vm_copy_pte - update PTEs by copying them from the GART * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @src: src addr to copy from * @count: number of page entries to update * * Update PTEs by copying them from the GART using DMA (SI). */ static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t src, unsigned count) { unsigned bytes = count * 8; ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, bytes); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; }
/** * si_dma_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer * @vm: amdgpu_vm pointer * * Update the page table base and flush the VM TLB * using sDMA (VI). */ static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); if (vm_id < 8) amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); else amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); amdgpu_ring_write(ring, pd_addr >> 12); /* bits 0-7 are the VM contexts0-7 */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST)); amdgpu_ring_write(ring, 1 << vm_id); /* wait for invalidate to complete */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0xff << 16); /* retry */ amdgpu_ring_write(ring, 1 << vm_id); /* mask */ amdgpu_ring_write(ring, 0); /* value */ amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ }
/** * cik_sdma_ring_emit_pipeline_sync - sync the pipeline * * @ring: amdgpu_ring pointer * * Make sure all previous operations are completed (CIK). */ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; /* wait for idle */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | (1 << 27)); /* Poll memory */ amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ amdgpu_ring_write(ring, 0xffffffff); /* mask */ amdgpu_ring_write(ring, seq); /* value */ amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ }
/** * si_dma_ring_test_ring - simple async dma engine test * * @ring: amdgpu_ring structure holding ring information * * Test the DMA engine by writing using it to write an * value to memory. (VI). * Returns 0 for success, error for failure. */ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; unsigned i; unsigned index; int r; u32 tmp; u64 gpu_addr; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 4); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_wb_free(adev, index); return r; } amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } amdgpu_wb_free(adev, index); return r; }
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) { /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ while ((ring->wptr & 7) != 5) amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); }
/** * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * @semaphore: radeon semaphore object * @emit_wait: wait or signal semaphore * * Add a DMA semaphore packet to the ring wait on or signal * other rings (r6xx-SI). */ bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) { u64 addr = semaphore->gpu_addr; u32 s = emit_wait ? 0 : 1; radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(addr) & 0xff); return true; }
/** * si_dma_vm_write_pte - update PTEs by writing them manually * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * * Update PTEs by writing them manually using DMA (SI). */ static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t value, unsigned count, uint32_t incr) { unsigned ndw = count * 2; ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = upper_32_bits(pe); for (; ndw > 0; ndw -= 2) { ib->ptr[ib->length_dw++] = lower_32_bits(value); ib->ptr[ib->length_dw++] = upper_32_bits(value); value += incr; } }
/** * r600_dma_ring_test - simple async dma engine test * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * * Test the DMA engine by writing using it to write an * value to memory. (r6xx-SI). * Returns 0 for success, error for failure. */ int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) { unsigned i; int r; unsigned index; u32 tmp; u64 gpu_addr; if (ring->idx == R600_RING_TYPE_DMA_INDEX) index = R600_WB_DMA_RING_TEST_OFFSET; else index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; gpu_addr = rdev->wb.gpu_addr + index; tmp = 0xCAFEDEAD; rdev->wb.wb[index/4] = cpu_to_le32(tmp); r = radeon_ring_lock(rdev, ring, 4); if (r) { DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); return r; } radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); radeon_ring_write(ring, lower_32_bits(gpu_addr)); radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); radeon_ring_write(ring, 0xDEADBEEF); radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < rdev->usec_timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } return r; }
/** * r600_dma_ring_test - simple async dma engine test * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * * Test the DMA engine by writing using it to write an * value to memory. (r6xx-SI). * Returns 0 for success, error for failure. */ int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) { unsigned i; int r; void __iomem *ptr = (void *)rdev->vram_scratch.ptr; u32 tmp; if (!ptr) { DRM_ERROR("invalid vram scratch pointer\n"); return -EINVAL; } tmp = 0xCAFEDEAD; writel(tmp, ptr); r = radeon_ring_lock(rdev, ring, 4); if (r) { DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); return r; } radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); radeon_ring_write(ring, 0xDEADBEEF); radeon_ring_unlock_commit(rdev, ring); for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < rdev->usec_timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } return r; }
void r600_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, uint64_t size) { struct radeon_winsys_cs *cs = rctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); size >>= 2; /* convert to dwords */ ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW); r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); for (i = 0; i < ncopy; i++) { csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_BUFFER); radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize)); radeon_emit(cs, dst_offset & 0xfffffffc); radeon_emit(cs, src_offset & 0xfffffffc); radeon_emit(cs, (dst_offset >> 32UL) & 0xff); radeon_emit(cs, (src_offset >> 32UL) & 0xff); dst_offset += csize << 2; src_offset += csize << 2; size -= csize; } r600_dma_emit_wait_idle(&rctx->b); }
.sw_fini = si_dma_sw_fini, .hw_init = si_dma_hw_init, .hw_fini = si_dma_hw_fini, .suspend = si_dma_suspend, .resume = si_dma_resume, .is_idle = si_dma_is_idle, .wait_for_idle = si_dma_wait_for_idle, .soft_reset = si_dma_soft_reset, .set_clockgating_state = si_dma_set_clockgating_state, .set_powergating_state = si_dma_set_powergating_state, }; static const struct amdgpu_ring_funcs si_dma_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), .get_rptr = si_dma_ring_get_rptr, .get_wptr = si_dma_ring_get_wptr, .set_wptr = si_dma_ring_set_wptr, .emit_frame_size = 3 + /* si_dma_ring_emit_hdp_flush */ 3 + /* si_dma_ring_emit_hdp_invalidate */ 6 + /* si_dma_ring_emit_pipeline_sync */ 12 + /* si_dma_ring_emit_vm_flush */ 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ .emit_ib = si_dma_ring_emit_ib, .emit_fence = si_dma_ring_emit_fence, .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, .emit_vm_flush = si_dma_ring_emit_vm_flush, .emit_hdp_flush = si_dma_ring_emit_hdp_flush,
/** * si_dma_pad_ib - pad the IB to the required number of dw * * @ib: indirect buffer to fill with padding * */ static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { while (ib->length_dw & 0x7) ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); }
static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL)); amdgpu_ring_write(ring, 1); }
static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0)); amdgpu_ring_write(ring, 1); }