static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_mqd *m; uint32_t *mqd_hqd; uint32_t reg, wptr_val, data; bool valid_wptr = false; m = get_mqd(mqd); acquire_queue(kgd, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ mqd_hqd = &m->cp_mqd_base_addr_lo; for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); /* Copy userspace write pointer value to register. * Activate doorbell logic to monitor subsequent changes. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); /* read_user_ptr may take the mm->mmap_sem. * release srbm_mutex to avoid circular dependency between * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. */ release_queue(kgd); valid_wptr = read_user_wptr(mm, wptr, wptr_val); acquire_queue(kgd, pipe_id, queue_id); if (valid_wptr) WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32(mmCP_HQD_ACTIVE, data); release_queue(kgd); return 0; }
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; uint32_t sdma_base_addr; uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; if (time_after(jiffies, end_jiffies)) return -ETIME; usleep_range(500, 1000); } if (m->sdma_engine_id) { data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, RESUME_CTX, 0); WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); } else { data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, RESUME_CTX, 0); WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); } data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); if (read_user_wptr(mm, wptr, data)) WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); else WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); return 0; }
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_mqd *m; uint32_t *mqd_hqd; uint32_t reg, wptr_val, data; bool valid_wptr = false; m = get_mqd(mqd); acquire_queue(kgd, pipe_id, queue_id); /* HIQ is set during driver init period with vmid set to 0*/ if (m->cp_hqd_vmid == 0) { uint32_t value, mec, pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); value = RREG32(mmRLC_CP_SCHEDULERS); value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, ((mec << 5) | (pipe << 3) | queue_id | 0x80)); WREG32(mmRLC_CP_SCHEDULERS, value); } /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); /* Tonga errata: EOP RPTR/WPTR should be left unmodified. * This is safe since EOP RPTR==WPTR for any inactive HQD * on ASICs that do not support context-save. * EOP writes/reads can start anywhere in the ring. */ if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); } for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); /* Copy userspace write pointer value to register. * Activate doorbell logic to monitor subsequent changes. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); /* read_user_ptr may take the mm->mmap_sem. * release srbm_mutex to avoid circular dependency between * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. */ release_queue(kgd); valid_wptr = read_user_wptr(mm, wptr, wptr_val); acquire_queue(kgd, pipe_id, queue_id); if (valid_wptr) WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32(mmCP_HQD_ACTIVE, data); release_queue(kgd); return 0; }