/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer * * Test if we can successfully write to the context register */ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; unsigned i; int r; WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); return r; } amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } return r; }
/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer * * Test if we can successfully write to the context register */ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; unsigned i; int r; WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i >= adev->usec_timeout) r = -ETIMEDOUT; return r; }
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; uint32_t val; struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); mutex_lock(&adev->virt.lock_kiq); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); mutex_unlock(&adev->virt.lock_kiq); r = dma_fence_wait(f, false); if (r) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); val = adev->wb.wb[adev->virt.reg_val_offs]; return val; }
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; unsigned long flags; uint32_t val, seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) { DRM_ERROR("wait for kiq fence error: %ld\n", r); return ~0; } val = adev->wb.wb[adev->virt.reg_val_offs]; return val; }
/** * uvd_v5_0_hw_init - start and test UVD block * * @adev: amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ static int uvd_v5_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->uvd.ring; uint32_t tmp; int r; /* raise clocks while booting up the VCPU */ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); r = uvd_v5_0_start(adev); if (r) goto done; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; goto done; } r = amdgpu_ring_alloc(ring, 10); if (r) { DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); goto done; } tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); /* Clear timeout status bits */ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0)); amdgpu_ring_write(ring, 0x8); amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); amdgpu_ring_write(ring, 3); amdgpu_ring_commit(ring); done: /* lower clocks again */ amdgpu_asic_set_uvd_clocks(adev, 0, 0); if (!r) DRM_INFO("UVD initialized successfully.\n"); return r; }
/** * uvd_v5_0_hw_init - start and test UVD block * * @adev: amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ static int uvd_v5_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->uvd.ring; uint32_t tmp; int r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); uvd_v5_0_enable_mgcg(adev, true); ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; goto done; } r = amdgpu_ring_alloc(ring, 10); if (r) { DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); goto done; } tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); /* Clear timeout status bits */ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0)); amdgpu_ring_write(ring, 0x8); amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); amdgpu_ring_write(ring, 3); amdgpu_ring_commit(ring); done: if (!r) DRM_INFO("UVD initialized successfully.\n"); return r; }
/** * cik_sdma_ring_test_ring - simple async dma engine test * * @ring: amdgpu_ring structure holding ring information * * Test the DMA engine by writing using it to write an * value to memory. (CIK). * Returns 0 for success, error for failure. */ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; unsigned i; unsigned index; int r; u32 tmp; u64 gpu_addr; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_wb_free(adev, index); return r; } amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); amdgpu_ring_write(ring, 1); /* number of DWs to follow */ amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } amdgpu_wb_free(adev, index); return r; }
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r, cnt = 0; unsigned long flags; uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); /* don't wait anymore for gpu reset case because this way may * block gpu_recover() routine forever, e.g. this virt_kiq_rreg * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will * never return if we keep waiting in virt_kiq_rreg, which cause * gpu_recover() hang there. * * also don't wait anymore for IRQ context * */ if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } if (cnt > MAX_KIQ_REG_TRY) goto failed_kiq_write; return; failed_kiq_write: pr_err("failed to write reg:%x\n", reg); }
/** * amdgpu_ring_restore - append saved commands to the ring again * * @ring: ring to append commands to * @size: number of dwords we want to write * @data: saved commands * * Allocates space on the ring and restore the previously saved commands. */ int amdgpu_ring_restore(struct amdgpu_ring *ring, unsigned size, uint32_t *data) { int i, r; if (!size || !data) return 0; /* restore the saved ring content */ r = amdgpu_ring_alloc(ring, size); if (r) return r; for (i = 0; i < size; ++i) { amdgpu_ring_write(ring, data[i]); } amdgpu_ring_commit(ring); kfree(data); return 0; }
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; unsigned long flags; uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) DRM_ERROR("wait for kiq fence error: %ld\n", r); }
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; signed long r, cnt = 0; unsigned long flags; uint32_t seq; spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); /* don't wait anymore for IRQ context */ if (r < 1 && in_interrupt()) goto failed_kiq; might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } if (cnt > MAX_KIQ_REG_TRY) goto failed_kiq; return; failed_kiq: pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); }
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); mutex_lock(&adev->virt.lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); mutex_unlock(&adev->virt.lock); r = dma_fence_wait(f, false); if (r) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); }
/** * uvd_v6_0_enc_ring_test_ring - test if UVD ENC ring is working * * @ring: the engine to test on * */ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t rptr = amdgpu_ring_get_rptr(ring); unsigned i; int r; r = amdgpu_ring_alloc(ring, 16); if (r) return r; amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; DRM_UDELAY(1); } if (i >= adev->usec_timeout) r = -ETIMEDOUT; return r; }
/** * uvd_v6_0_hw_init - start and test UVD block * * @adev: amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ static int uvd_v6_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int i, r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); uvd_v6_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); if (r) goto done; r = amdgpu_ring_alloc(ring, 10); if (r) { DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); goto done; } tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); amdgpu_ring_write(ring, tmp); amdgpu_ring_write(ring, 0xFFFFF); /* Clear timeout status bits */ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0)); amdgpu_ring_write(ring, 0x8); amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); amdgpu_ring_write(ring, 3); amdgpu_ring_commit(ring); if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) goto done; } } done: if (!r) { if (uvd_v6_0_enc_support(adev)) DRM_INFO("UVD and UVD ENC initialized successfully.\n"); else DRM_INFO("UVD initialized successfully.\n"); } return r; }
/** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @adev: amdgpu_device pointer * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; unsigned fence_flags = 0; unsigned i; int r = 0; bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; /* ring tests don't use a job */ if (job) { vm = job->vm; fence_ctx = job->base.s_fence->scheduled.context; } else { vm = NULL; fence_ctx = 0; } if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } alloc_size = ring->funcs->emit_frame_size + num_ibs * ring->funcs->emit_ib_size; r = amdgpu_ring_alloc(ring, alloc_size); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; if (tmp) trace_amdgpu_ib_pipe_sync(job, tmp); dma_fence_put(tmp); } if (ring->funcs->insert_start) ring->funcs->insert_start(ring); if (job) { r = amdgpu_vm_flush(ring, job, need_pipe_sync); if (r) { amdgpu_ring_undo(ring); return r; } } if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) #endif { if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); else amdgpu_asic_flush_hdp(adev, ring); } if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH; skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; amdgpu_ring_emit_cntxcntl(ring, status); } for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble && !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; amdgpu_ring_emit_ib(ring, job, ib, status); status &= ~AMDGPU_HAVE_CTX_SWITCH; } if (ring->funcs->emit_tmz) amdgpu_ring_emit_tmz(ring, false); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) #endif amdgpu_asic_invalidate_hdp(adev, ring); if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* wrap the last IB with fence */ if (job && job->uf_addr) { amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); amdgpu_ring_undo(ring); return r; } if (ring->funcs->insert_end) ring->funcs->insert_end(ring); if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); ring->current_ctx = fence_ctx; if (vm && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_commit(ring); return 0; }
/** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @adev: amdgpu_device pointer * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct fence *last_vm_update, struct amdgpu_job *job, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; struct fence *hwf; uint64_t ctx; unsigned i; int r = 0; if (num_ibs == 0) return -EINVAL; /* ring tests don't use a job */ if (job) { vm = job->vm; ctx = job->ctx; } else { vm = NULL; ctx = 0; } if (!ring->ready) { dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } if (vm && !job->vm_id) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } r = amdgpu_ring_alloc(ring, 256 * num_ibs); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); if (vm) { r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); if (r) { amdgpu_ring_undo(ring); return r; } } if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; skip_preamble = ring->current_ctx == ctx; need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; } if (ring->funcs->emit_hdp_invalidate) amdgpu_ring_emit_hdp_invalidate(ring); r = amdgpu_fence_emit(ring, &hwf); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) amdgpu_vm_reset_id(adev, job->vm_id); amdgpu_ring_undo(ring); return r; } /* wrap the last IB with fence */ if (job && job->uf_bo) { uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); addr += job->uf_offset; amdgpu_ring_emit_fence(ring, addr, job->uf_sequence, AMDGPU_FENCE_FLAG_64BIT); } if (f) *f = fence_get(hwf); if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); ring->current_ctx = ctx; amdgpu_ring_commit(ring); return 0; }