예제 #1
0
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	if (amdgpu_device_should_recover_gpu(adev))
		amdgpu_device_gpu_recover(adev, NULL);
}
예제 #2
0
파일: mxgpu_ai.c 프로젝트: avagin/linux
static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
{
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
	int locked;

	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
	 * otherwise the mailbox msg will be ruined/reseted by
	 * the VF FLR.
	 *
	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
	 * which means host side had finished this VF's FLR.
	 */
	locked = mutex_trylock(&adev->lock_reset);
	if (locked)
		adev->in_gpu_reset = 1;

	do {
		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
			goto flr_done;

		msleep(10);
		timeout -= 10;
	} while (timeout > 1);

flr_done:
	if (locked) {
		adev->in_gpu_reset = 0;
		mutex_unlock(&adev->lock_reset);
	}

	/* Trigger recovery for world switch failure if no TDR */
	if (amdgpu_device_should_recover_gpu(adev)
		&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
		amdgpu_device_gpu_recover(adev, NULL);
}