static int semaphore_passed(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_engine_cs *signaller; u32 seqno; engine->hangcheck.deadlock++; signaller = semaphore_waits_for(engine, &seqno); if (signaller == NULL) return -1; if (IS_ERR(signaller)) return 0; /* Prevent pathological recursion due to driver bugs */ if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) return -1; if (i915_seqno_passed(intel_engine_get_seqno(signaller), seqno)) return 1; /* cursory check for an unkickable deadlock */ if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && semaphore_passed(signaller) < 0) return -1; return 0; }
static int igt_wakeup_thread(void *arg) { struct igt_wakeup *w = arg; struct intel_wait wait; while (wait_for_ready(w)) { GEM_BUG_ON(kthread_should_stop()); intel_wait_init_for_seqno(&wait, w->seqno); intel_engine_add_wait(w->engine, &wait); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (i915_seqno_passed(intel_engine_get_seqno(w->engine), w->seqno)) break; if (test_bit(STOP, &w->flags)) /* emergency escape */ break; schedule(); } intel_engine_remove_wait(w->engine, &wait); __set_current_state(TASK_RUNNING); } return 0; }
static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; int err = 0; /* Check that we can issue an engine reset on an idle engine (no-op) */ if (!intel_has_reset_engine(i915)) return 0; if (active) { mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); mutex_unlock(&i915->drm.struct_mutex); if (err) return err; } for_each_engine(engine, i915, id) { unsigned int reset_count, reset_engine_count; IGT_TIMEOUT(end_time); if (active && !intel_engine_can_store_dword(engine)) continue; if (!wait_for_idle(engine)) { pr_err("%s failed to idle before reset\n", engine->name); err = -EIO; break; } reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { u32 seqno = intel_engine_get_seqno(engine); if (active) { struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); mutex_unlock(&i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); mutex_unlock(&i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(engine, &p, "%s\n", engine->name); i915_request_put(rq); err = -EIO; break; } GEM_BUG_ON(!rq->global_seqno); seqno = rq->global_seqno - 1; i915_request_put(rq); } err = i915_reset_engine(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; } if (i915_reset_count(&i915->gpu_error) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } reset_engine_count += active; if (i915_reset_engine_count(&i915->gpu_error, engine) != reset_engine_count) { pr_err("%s engine reset %srecorded!\n", engine->name, active ? "not " : ""); err = -EINVAL; break; } if (!wait_for_idle(engine)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s failed to idle after reset\n", engine->name); intel_engine_dump(engine, &p, "%s\n", engine->name); err = -EIO; break; } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); if (err) break; err = igt_flush_test(i915, 0); if (err) break; }
static int igt_wakeup(void *arg) { I915_RND_STATE(prng); struct intel_engine_cs *engine = arg; struct igt_wakeup *waiters; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); const int count = 4096; const u32 max_seqno = count / 4; atomic_t ready, set, done; int err = -ENOMEM; int n, step; mock_engine_reset(engine); waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL); if (!waiters) goto out_engines; /* Create a large number of threads, each waiting on a random seqno. * Multiple waiters will be waiting for the same seqno. */ atomic_set(&ready, count); for (n = 0; n < count; n++) { waiters[n].wq = &wq; waiters[n].ready = &ready; waiters[n].set = &set; waiters[n].done = &done; waiters[n].engine = engine; waiters[n].flags = BIT(IDLE); waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], "i915/igt:%d", n); if (IS_ERR(waiters[n].tsk)) goto out_waiters; get_task_struct(waiters[n].tsk); } for (step = 1; step <= max_seqno; step <<= 1) { u32 seqno; /* The waiter threads start paused as we assign them a random * seqno and reset the engine. Once the engine is reset, * we signal that the threads may begin their wait upon their * seqno. */ for (n = 0; n < count; n++) { GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); waiters[n].seqno = 1 + prandom_u32_state(&prng) % max_seqno; } mock_seqno_advance(engine, 0); igt_wake_all_sync(&ready, &set, &done, &wq, count); /* Simulate the GPU doing chunks of work, with one or more * seqno appearing to finish at the same time. A random number * of threads will be waiting upon the update and hopefully be * woken. */ for (seqno = 1; seqno <= max_seqno + step; seqno += step) { usleep_range(50, 500); mock_seqno_advance(engine, seqno); } GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); /* With the seqno now beyond any of the waiting threads, they * should all be woken, see that they are complete and signal * that they are ready for the next test. We wait until all * threads are complete and waiting for us (i.e. not a seqno). */ if (!wait_var_event_timeout(&done, !atomic_read(&done), 10 * HZ)) { pr_err("Timed out waiting for %d remaining waiters\n", atomic_read(&done)); err = -ETIMEDOUT; break; } err = check_rbtree_empty(engine); if (err) break; } out_waiters: for (n = 0; n < count; n++) { if (IS_ERR(waiters[n].tsk)) break; set_bit(STOP, &waiters[n].flags); } mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ igt_wake_all_sync(&ready, &set, &done, &wq, n); for (n = 0; n < count; n++) { if (IS_ERR(waiters[n].tsk)) break; kthread_stop(waiters[n].tsk); put_task_struct(waiters[n].tsk); } kvfree(waiters); out_engines: mock_engine_flush(engine); return err; }