/** * cik_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer * @fence: radeon fence object * * Copy GPU paging using the DMA engine (CIK). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ int cik_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence) { struct radeon_semaphore *sem = NULL; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0; r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return r; } size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); return r; } radeon_semaphore_sync_to(sem, *fence); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; if (cur_size_in_bytes > 0x1fffff) cur_size_in_bytes = 0x1fffff; size_in_bytes -= cur_size_in_bytes; radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); radeon_ring_write(ring, cur_size_in_bytes); radeon_ring_write(ring, 0); /* src/dst endian swap */ radeon_ring_write(ring, lower_32_bits(src_offset)); radeon_ring_write(ring, upper_32_bits(src_offset)); radeon_ring_write(ring, lower_32_bits(dst_offset)); radeon_ring_write(ring, upper_32_bits(dst_offset)); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; } r = radeon_fence_emit(rdev, fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); return r; } radeon_ring_unlock_commit(rdev, ring); radeon_semaphore_free(rdev, &sem, *fence); return r; }
/** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @rdev: radeon_device pointer * @ib: IB object to schedule * @const_ib: Const IB to schedule (SI only) * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ dev_err(rdev->dev, "couldn't schedule ib\n"); return -EINVAL; } /* 64 dwords should be enough for fence too */ r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_SYNCS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } /* grab a vm id if necessary */ if (ib->vm) { struct radeon_fence *vm_id_fence; vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); } /* sync with other rings */ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); if (r) { dev_err(rdev->dev, "failed to sync rings (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); return r; } if (ib->vm) radeon_vm_flush(rdev, ib->vm, ib->ring); if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); if (r) { dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); return r; } if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } if (ib->vm) radeon_vm_fence(rdev, ib->vm, ib->fence); radeon_ring_unlock_commit(rdev, ring); return 0; }
/** * rv770_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer * @fence: radeon fence object * * Copy GPU paging using the DMA engine (r7xx). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ int rv770_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence) { struct radeon_semaphore *sem = NULL; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0; r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); return r; } size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); return r; } radeon_semaphore_sync_to(sem, *fence); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; if (cur_size_in_dw > 0xFFFF) cur_size_in_dw = 0xFFFF; size_in_dw -= cur_size_in_dw; radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); radeon_ring_write(ring, dst_offset & 0xfffffffc); radeon_ring_write(ring, src_offset & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); src_offset += cur_size_in_dw * 4; dst_offset += cur_size_in_dw * 4; } r = radeon_fence_emit(rdev, fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); return r; } radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; }
static void radeon_cs_sync_rings(struct radeon_cs_parser *p) { int i; for (i = 0; i < p->nrelocs; i++) { if (!p->relocs[i].robj) continue; radeon_semaphore_sync_to(p->ib.semaphore, p->relocs[i].robj->tbo.sync_obj); } }
static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, struct radeon_cs_parser *parser) { struct radeon_fpriv *fpriv = parser->filp->driver_priv; struct radeon_vm *vm = &fpriv->vm; int r; if (parser->chunk_ib_idx == -1) return 0; if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) return 0; if (parser->const_ib.length_dw) { r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); if (r) { return r; } } r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); if (r) { return r; } if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); mutex_lock(&vm->mutex); r = radeon_bo_vm_update_pte(parser, vm); if (r) { goto out; } radeon_cs_sync_rings(parser); radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); } else { r = radeon_ib_schedule(rdev, &parser->ib, NULL); } out: mutex_unlock(&vm->mutex); return r; }