/** * amdgpu_gart_bind - bind pages into the gart page table * * @adev: amdgpu_device pointer * @offset: offset into the GPU's gart aperture * @pages: number of pages to bind * @pagelist: pages to bind * @dma_addr: DMA addresses of pages * * Binds the requested pages to the gart page table * (all asics). * Returns 0 for success, -EINVAL for failure. */ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint64_t flags) { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS unsigned i,t,p; #endif int r; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); return -EINVAL; } #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; for (i = 0; i < pages; i++, p++) adev->gart.pages[p] = pagelist ? pagelist[i] : NULL; #endif if (!adev->gart.ptr) return 0; r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, adev->gart.ptr); if (r) return r; mb(); amdgpu_asic_flush_hdp(adev, NULL); amdgpu_gmc_flush_gpu_tlb(adev, 0); return 0; }
/** * amdgpu_vm_cpu_commit - commit page table update to the HW * * @p: see amdgpu_vm_update_params definition * @fence: unused * * Make sure that the hardware sees the page table updates. */ static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { /* Flush HDP */ mb(); amdgpu_asic_flush_hdp(p->adev, NULL); return 0; }
/** * amdgpu_gart_unbind - unbind pages from the gart page table * * @adev: amdgpu_device pointer * @offset: offset into the GPU's gart aperture * @pages: number of pages to unbind * * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). * Returns 0 for success, -EINVAL for failure. */ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; unsigned p; int i, j; u64 page_base; /* Starting from VEGA10, system bit must be 0 to mean invalid. */ uint64_t flags = 0; if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); return -EINVAL; } t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; for (i = 0; i < pages; i++, p++) { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS adev->gart.pages[p] = NULL; #endif page_base = adev->dummy_page_addr; if (!adev->gart.ptr) continue; for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) { amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; } } mb(); amdgpu_asic_flush_hdp(adev, NULL); amdgpu_gmc_flush_gpu_tlb(adev, 0); return 0; }
/** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @adev: amdgpu_device pointer * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; unsigned fence_flags = 0; unsigned i; int r = 0; bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; /* ring tests don't use a job */ if (job) { vm = job->vm; fence_ctx = job->base.s_fence->scheduled.context; } else { vm = NULL; fence_ctx = 0; } if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } alloc_size = ring->funcs->emit_frame_size + num_ibs * ring->funcs->emit_ib_size; r = amdgpu_ring_alloc(ring, alloc_size); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; if (tmp) trace_amdgpu_ib_pipe_sync(job, tmp); dma_fence_put(tmp); } if (ring->funcs->insert_start) ring->funcs->insert_start(ring); if (job) { r = amdgpu_vm_flush(ring, job, need_pipe_sync); if (r) { amdgpu_ring_undo(ring); return r; } } if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) #endif { if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); else amdgpu_asic_flush_hdp(adev, ring); } if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH; skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; amdgpu_ring_emit_cntxcntl(ring, status); } for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble && !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; amdgpu_ring_emit_ib(ring, job, ib, status); status &= ~AMDGPU_HAVE_CTX_SWITCH; } if (ring->funcs->emit_tmz) amdgpu_ring_emit_tmz(ring, false); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) #endif amdgpu_asic_invalidate_hdp(adev, ring); if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* wrap the last IB with fence */ if (job && job->uf_addr) { amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); amdgpu_ring_undo(ring); return r; } if (ring->funcs->insert_end) ring->funcs->insert_end(ring); if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); ring->current_ctx = fence_ctx; if (vm && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_commit(ring); return 0; }