uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; uint32_t val; struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); mutex_lock(&adev->virt.lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_rreg(ring, reg); amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); mutex_unlock(&adev->virt.lock); r = dma_fence_wait(f, false); if (r) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); val = adev->wb.wb[adev->virt.reg_val_offs]; return val; }
/** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @adev: amdgpu_device pointer * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct fence *last_vm_update, struct amdgpu_job *job, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; struct fence *hwf; uint64_t ctx; unsigned i; int r = 0; if (num_ibs == 0) return -EINVAL; /* ring tests don't use a job */ if (job) { vm = job->vm; ctx = job->ctx; } else { vm = NULL; ctx = 0; } if (!ring->ready) { dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } if (vm && !job->vm_id) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } r = amdgpu_ring_alloc(ring, 256 * num_ibs); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); if (vm) { r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); if (r) { amdgpu_ring_undo(ring); return r; } } if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; skip_preamble = ring->current_ctx == ctx; need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; } if (ring->funcs->emit_hdp_invalidate) amdgpu_ring_emit_hdp_invalidate(ring); r = amdgpu_fence_emit(ring, &hwf); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) amdgpu_vm_reset_id(adev, job->vm_id); amdgpu_ring_undo(ring); return r; } /* wrap the last IB with fence */ if (job && job->uf_bo) { uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); addr += job->uf_offset; amdgpu_ring_emit_fence(ring, addr, job->uf_sequence, AMDGPU_FENCE_FLAG_64BIT); } if (f) *f = fence_get(hwf); if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); ring->current_ctx = ctx; amdgpu_ring_commit(ring); return 0; }