/** * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer * @vm: amdgpu_vm pointer * * Update the page table base and flush the VM TLB * using sDMA (CIK). */ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm_id < 8) { amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); } else { amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); /* flush TLB */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 1 << vm_id); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0); /* reference */ amdgpu_ring_write(ring, 0); /* mask */ amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ }
/** * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer * @ib: IB object to schedule * * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 4) next_rptr++; next_rptr += 4; amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); amdgpu_ring_write(ring, 1); /* number of DWs to follow */ amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); amdgpu_ring_write(ring, ib->length_dw); }
/** * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine * * @rdev: radeon_device pointer * @ib: IB object to schedule * * Schedule an IB in the DMA ring (CIK). */ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 4) next_rptr++; next_rptr += 4; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr)); radeon_ring_write(ring, 1); /* number of DWs to follow */ radeon_ring_write(ring, next_rptr); } /* IB packet must end on a 8 DW boundary */ while ((ring->wptr & 7) != 4) radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); radeon_ring_write(ring, ib->length_dw); }
/** * cik_sdma_vm_set_page - update the page tables using sDMA * * @rdev: radeon_device pointer * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * * Update the page tables using sDMA (CIK). */ void cik_sdma_vm_set_page(struct radeon_device *rdev, struct radeon_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { uint64_t value; unsigned ndw; trace_radeon_vm_set_page(pe, addr, count, incr, flags); if (flags & R600_PTE_SYSTEM) { while (count) { ndw = count * 2; if (ndw > 0xFFFFE) ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = pe; ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = ndw; for (; ndw > 0; ndw -= 2, --count, pe += 8) { value = radeon_vm_map_gart(rdev, addr); value &= 0xFFFFFFFFFFFFF000ULL; addr += incr; value |= flags; ib->ptr[ib->length_dw++] = value; ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { while (count) { ndw = count; if (ndw > 0x7FFFF) ndw = 0x7FFFF; if (flags & R600_PTE_VALID) value = addr; else value = 0; /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); ib->ptr[ib->length_dw++] = pe; /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = flags; /* mask */ ib->ptr[ib->length_dw++] = 0; ib->ptr[ib->length_dw++] = value; /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(value); ib->ptr[ib->length_dw++] = incr; /* increment size */ ib->ptr[ib->length_dw++] = 0; ib->ptr[ib->length_dw++] = ndw; /* number of entries */ pe += ndw * 8; addr += ndw * incr; count -= ndw; } } while (ib->length_dw & 0x7) ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); }
static int cik_sdma_sw_init(void *handle) { struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; r = cik_sdma_init_microcode(adev); if (r) { DRM_ERROR("Failed to load sdma firmware!\n"); return r; } /* SDMA trap event */ r = amdgpu_irq_add_id(adev, 224, &adev->sdma_trap_irq); if (r) return r; /* SDMA Privileged inst */ r = amdgpu_irq_add_id(adev, 241, &adev->sdma_illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ r = amdgpu_irq_add_id(adev, 247, &adev->sdma_illegal_inst_irq); if (r) return r; ring = &adev->sdma[0].ring; ring->ring_obj = NULL; ring = &adev->sdma[1].ring; ring->ring_obj = NULL; ring = &adev->sdma[0].ring; sprintf(ring->name, "sdma0"); r = amdgpu_ring_init(adev, ring, 256 * 1024, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf, &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP0, AMDGPU_RING_TYPE_SDMA); if (r) return r; ring = &adev->sdma[1].ring; sprintf(ring->name, "sdma1"); r = amdgpu_ring_init(adev, ring, 256 * 1024, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf, &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP1, AMDGPU_RING_TYPE_SDMA); if (r) return r; return r; }
/** * cik_sdma_fence_ring_emit - emit a fence on the DMA ring * * @rdev: radeon_device pointer * @fence: radeon fence object * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate * an interrupt if needed (CIK). */ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr; /* write the fence */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); radeon_ring_write(ring, lower_32_bits(addr)); radeon_ring_write(ring, upper_32_bits(addr)); radeon_ring_write(ring, fence->seq); /* generate an interrupt */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); /* flush HDP */ cik_sdma_hdp_flush_ring_emit(rdev, fence->ring); }
/** * cik_sdma_vm_write_pages - update PTEs by writing them manually * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * * Update PTEs by writing them manually using sDMA (CIK). */ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { uint64_t value; unsigned ndw; while (count) { ndw = count * 2; if (ndw > 0xFFFFE) ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = pe; ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = ndw; for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & AMDGPU_PTE_SYSTEM) { value = amdgpu_vm_map_gart(ib->ring->adev, addr); value &= 0xFFFFFFFFFFFFF000ULL; } else if (flags & AMDGPU_PTE_VALID) { value = addr; } else { value = 0; } addr += incr; value |= flags; ib->ptr[ib->length_dw++] = value; ib->ptr[ib->length_dw++] = upper_32_bits(value); } } }
/** * cik_sdma_vm_set_pages - update the page tables using sDMA * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * * Update the page tables using sDMA (CIK). */ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { uint64_t value; unsigned ndw; while (count) { ndw = count; if (ndw > 0x7FFFF) ndw = 0x7FFFF; if (flags & AMDGPU_PTE_VALID) value = addr; else value = 0; /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); ib->ptr[ib->length_dw++] = pe; /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = flags; /* mask */ ib->ptr[ib->length_dw++] = 0; ib->ptr[ib->length_dw++] = value; /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(value); ib->ptr[ib->length_dw++] = incr; /* increment size */ ib->ptr[ib->length_dw++] = 0; ib->ptr[ib->length_dw++] = ndw; /* number of entries */ pe += ndw * 8; addr += ndw * incr; count -= ndw; } }
/** * cik_sdma_vm_pad_ib - pad the IB to the required number of dw * * @ib: indirect buffer to fill with padding * */ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); u32 pad_count; int i; pad_count = (8 - (ib->length_dw & 0x7)) % 8; for (i = 0; i < pad_count; i++) if (sdma && sdma->burst_nop && (i == 0)) ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) | SDMA_NOP_COUNT(pad_count - 1); else ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); }
/** * cik_sdma_vm_set_page - update the page tables using sDMA * * @rdev: radeon_device pointer * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * * Update the page tables using sDMA (CIK). */ void cik_sdma_vm_set_page(struct radeon_device *rdev, struct radeon_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { uint64_t value; unsigned ndw; trace_radeon_vm_set_page(pe, addr, count, incr, flags); if (flags == R600_PTE_GART) { uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; while (count) { unsigned bytes = count * 8; if (bytes > 0x1FFFF8) bytes = 0x1FFFF8; ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = bytes; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(src); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = upper_32_bits(pe); pe += bytes; src += bytes; count -= bytes / 8; } } else if (flags & R600_PTE_SYSTEM) {
/** * cik_sdma_vm_write_pages - update PTEs by writing them manually * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * * Update PTEs by writing them manually using sDMA (CIK). */ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, const dma_addr_t *pages_addr, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { uint64_t value; unsigned ndw; while (count) { ndw = count * 2; if (ndw > 0xFFFFE) ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = pe; ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = ndw; for (; ndw > 0; ndw -= 2, --count, pe += 8) { value = amdgpu_vm_map_gart(pages_addr, addr); addr += incr; value |= flags; ib->ptr[ib->length_dw++] = value; ib->ptr[ib->length_dw++] = upper_32_bits(value); } } }
/** * cik_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer * @resv: reservation object to sync to * * Copy GPU paging using the DMA engine (CIK). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct reservation_object *resv) { struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0; r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return ERR_PTR(r); } size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_semaphore_sync_resv(rdev, sem, resv, false); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; if (cur_size_in_bytes > 0x1fffff) cur_size_in_bytes = 0x1fffff; size_in_bytes -= cur_size_in_bytes; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); radeon_ring_write(ring, cur_size_in_bytes); radeon_ring_write(ring, 0); /* src/dst endian swap */ radeon_ring_write(ring, lower_32_bits(src_offset)); radeon_ring_write(ring, upper_32_bits(src_offset)); radeon_ring_write(ring, lower_32_bits(dst_offset)); radeon_ring_write(ring, upper_32_bits(dst_offset)); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; } r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); return ERR_PTR(r); } radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, fence); return fence; }
static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) { const int sdma_write_length = 128; const int pm4_dw = 256; amdgpu_context_handle context_handle; amdgpu_bo_handle bo; amdgpu_bo_handle *resources; uint32_t *pm4; struct amdgpu_cs_ib_info *ib_info; struct amdgpu_cs_request *ibs_request; uint64_t bo_mc; volatile uint32_t *bo_cpu; int i, j, r, loop; uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; amdgpu_va_handle va_handle; pm4 = calloc(pm4_dw, sizeof(*pm4)); CU_ASSERT_NOT_EQUAL(pm4, NULL); ib_info = calloc(1, sizeof(*ib_info)); CU_ASSERT_NOT_EQUAL(ib_info, NULL); ibs_request = calloc(1, sizeof(*ibs_request)); CU_ASSERT_NOT_EQUAL(ibs_request, NULL); r = amdgpu_cs_ctx_create(device_handle, &context_handle); CU_ASSERT_EQUAL(r, 0); /* prepare resource */ resources = calloc(1, sizeof(amdgpu_bo_handle)); CU_ASSERT_NOT_EQUAL(resources, NULL); loop = 0; while(loop < 2) { /* allocate UC bo for sDMA use */ r = amdgpu_bo_alloc_and_map(device_handle, sdma_write_length * sizeof(uint32_t), 4096, AMDGPU_GEM_DOMAIN_GTT, gtt_flags[loop], &bo, (void**)&bo_cpu, &bo_mc, &va_handle); CU_ASSERT_EQUAL(r, 0); /* clear bo */ memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); resources[0] = bo; /* fullfill PM4: test DMA write-linear */ i = j = 0; if (ip_type == AMDGPU_HW_IP_DMA) { pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); pm4[i++] = 0xffffffff & bo_mc; pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; pm4[i++] = sdma_write_length; while(j++ < sdma_write_length) pm4[i++] = 0xdeadbeaf; } else if (ip_type == AMDGPU_HW_IP_GFX) {
/** * cik_sdma_ib_test - test an IB on the DMA engine * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * * Test a simple IB in the DMA ring (CIK). * Returns 0 on success, error on failure. */ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_ib ib; unsigned i; unsigned index; int r; u32 tmp = 0; u64 gpu_addr; if (ring->idx == R600_RING_TYPE_DMA_INDEX) index = R600_WB_DMA_RING_TEST_OFFSET; else index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; gpu_addr = rdev->wb.gpu_addr + index; tmp = 0xCAFEDEAD; rdev->wb.wb[index/4] = cpu_to_le32(tmp); r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); return r; } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib.ptr[1] = lower_32_bits(gpu_addr); ib.ptr[2] = upper_32_bits(gpu_addr); ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); return r; } r = radeon_fence_wait(ib.fence, false); if (r) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); return r; } for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < rdev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); } else { DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } radeon_ib_free(rdev, &ib); return r; }
/** * cik_dma_vm_flush - cik vm flush using sDMA * * @rdev: radeon_device pointer * * Update the page table base and flush the VM TLB * using sDMA (CIK). */ void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, unsigned vm_id, uint64_t pd_addr) { u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm_id < 8) { radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else {
/** * cik_sdma_ib_test - test an IB on the DMA engine * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * * Test a simple IB in the DMA ring (CIK). * Returns 0 on success, error on failure. */ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_ib ib; unsigned i; int r; void __iomem *ptr = (void *)rdev->vram_scratch.ptr; u32 tmp = 0; if (!ptr) { DRM_ERROR("invalid vram scratch pointer\n"); return -EINVAL; } tmp = 0xCAFEDEAD; writel(tmp, ptr); r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); return r; } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr); ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); return r; } r = radeon_fence_wait(ib.fence, false); if (r) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); return r; } for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < rdev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); } else { DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } radeon_ib_free(rdev, &ib); return r; }
/** * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine * * @ring: amdgpu_ring structure holding ring information * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer * * Fill GPU buffers using the DMA engine (CIK). */ static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0)); amdgpu_ring_write(ring, lower_32_bits(dst_offset)); amdgpu_ring_write(ring, upper_32_bits(dst_offset)); amdgpu_ring_write(ring, src_data); amdgpu_ring_write(ring, byte_count); }
/** * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine * * @ring: amdgpu_ring structure holding ring information * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer * * Fill GPU buffers using the DMA engine (CIK). */ static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); ib->ptr[ib->length_dw++] = src_data; ib->ptr[ib->length_dw++] = byte_count; }
/** * cik_dma_vm_flush - cik vm flush using sDMA * * @rdev: radeon_device pointer * * Update the page table base and flush the VM TLB * using sDMA (CIK). */ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) { struct radeon_ring *ring = &rdev->ring[ridx]; if (vm == NULL) return; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm->id < 8) { radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); } else {
/** * cik_dma_vm_flush - cik vm flush using sDMA * * @rdev: radeon_device pointer * * Update the page table base and flush the VM TLB * using sDMA (CIK). */ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) { u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ struct radeon_ring *ring = &rdev->ring[ridx]; if (vm == NULL) return; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm->id < 8) { radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); } else {
/** * cik_sdma_ring_emit_semaphore - emit a semaphore on the dma ring * * @ring: amdgpu_ring structure holding ring information * @semaphore: amdgpu semaphore object * @emit_wait: wait or signal semaphore * * Add a DMA semaphore packet to the ring wait on or signal * other rings (CIK). */ static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring, struct amdgpu_semaphore *semaphore, bool emit_wait) { u64 addr = semaphore->gpu_addr; u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); amdgpu_ring_write(ring, addr & 0xfffffff8); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); return true; }
/** * cik_sdma_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer * @fence: amdgpu fence object * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate * an interrupt if needed (CIK). */ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); /* optionally write high bits as well */ if (write64bit) { addr += 4; amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); } /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); }
/** * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine * * @ring: amdgpu_ring structure holding ring information * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = byte_count; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); }
/** * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine * * @ring: amdgpu_ring structure holding ring information * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, byte_count); amdgpu_ring_write(ring, 0); /* src/dst endian swap */ amdgpu_ring_write(ring, lower_32_bits(src_offset)); amdgpu_ring_write(ring, upper_32_bits(src_offset)); amdgpu_ring_write(ring, lower_32_bits(dst_offset)); amdgpu_ring_write(ring, upper_32_bits(dst_offset)); }
/** * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information * @semaphore: radeon semaphore object * @emit_wait: wait or signal semaphore * * Add a DMA semaphore packet to the ring wait on or signal * other rings (CIK). */ bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) { u64 addr = semaphore->gpu_addr; u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); radeon_ring_write(ring, addr & 0xfffffff8); radeon_ring_write(ring, upper_32_bits(addr)); return true; }
/** * cik_sdma_ring_test_ring - simple async dma engine test * * @ring: amdgpu_ring structure holding ring information * * Test the DMA engine by writing using it to write an * value to memory. (CIK). * Returns 0 for success, error for failure. */ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; unsigned i; unsigned index; int r; u32 tmp; u64 gpu_addr; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_wb_free(adev, index); return r; } amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); amdgpu_ring_write(ring, 1); /* number of DWs to follow */ amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } amdgpu_wb_free(adev, index); return r; }
/** * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @src: src addr to copy from * @count: number of page entries to update * * Update PTEs by copying them from the GART using sDMA (CIK). */ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib, uint64_t pe, uint64_t src, unsigned count) { unsigned bytes = count * 8; ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = bytes; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(src); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = upper_32_bits(pe); }
/** * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer * @ib: IB object to schedule * * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) { u32 extra_bits = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); amdgpu_ring_write(ring, ib->length_dw); }
/** * cik_sdma_vm_set_pages - update the page tables using sDMA * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * * Update the page tables using sDMA (CIK). */ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ ib->ptr[ib->length_dw++] = upper_32_bits(flags); ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(addr); ib->ptr[ib->length_dw++] = incr; /* increment size */ ib->ptr[ib->length_dw++] = 0; ib->ptr[ib->length_dw++] = count; /* number of entries */ }
/** * cik_sdma_ring_emit_pipeline_sync - sync the pipeline * * @ring: amdgpu_ring pointer * * Make sure all previous operations are completed (CIK). */ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; /* wait for idle */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */ SDMA_POLL_REG_MEM_EXTRA_M)); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); amdgpu_ring_write(ring, seq); /* reference */ amdgpu_ring_write(ring, 0xfffffff); /* mask */ amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */ }