/** * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled * * @rdev: radeon device pointer * @seq: sequence number * @ring: ring index the fence is associated with * * Check if the last singled fence sequnce number is >= the requested * sequence number (all asics). * Returns true if the fence has signaled (current fence value * is >= requested value) or false if it has not (current fence * value is < the requested value. Helper function for * radeon_fence_signaled(). */ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) { if (atomic_load_acq_64(&rdev->fence_drv[ring].last_seq) >= seq) { return true; } /* poll new last sequence at least once */ radeon_fence_process(rdev, ring); if (atomic_load_acq_64(&rdev->fence_drv[ring].last_seq) >= seq) { return true; } return false; }
static int radeon_debugfs_fence_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; int i, j; for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized) continue; seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic_load_acq_64(&rdev->fence_drv[i].last_seq)); seq_printf(m, "Last emitted 0x%016llx\n", rdev->fence_drv[i].sync_seq[i]); for (j = 0; j < RADEON_NUM_RINGS; ++j) { if (i != j && rdev->fence_drv[j].initialized) seq_printf(m, "Last sync to ring %d 0x%016llx\n", j, rdev->fence_drv[i].sync_seq[j]); } } return 0; }
/** * radeon_fence_driver_start_ring - make the fence driver * ready for use on the requested ring. * * @rdev: radeon device pointer * @ring: ring index to start the fence driver on * * Make the fence driver ready for processing (all asics). * Not all asics have all rings, so each asic will only * start the fence driver on the rings it has. * Returns 0 for success, errors for failure. */ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; int r; radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { rdev->fence_drv[ring].scratch_reg = 0; index = R600_WB_EVENT_OFFSET + ring * 4; } else { r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); if (r) { dev_err(rdev->dev, "fence failed to get scratch register\n"); return r; } index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv[ring].scratch_reg - rdev->scratch.reg_base; } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; radeon_fence_write(rdev, atomic_load_acq_64(&rdev->fence_drv[ring].last_seq), ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016jx and cpu addr 0x%p\n", ring, (uintmax_t)rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); return 0; }
/** * radeon_fence_wait_next_locked - wait for the next fence to signal * * @rdev: radeon device pointer * @ring: ring index the fence is associated with * * Wait for the next fence on the requested ring to signal (all asics). * Returns 0 if the next fence has passed, error for all other cases. * Caller must hold ring lock. */ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) { uint64_t seq; seq = atomic_load_acq_64(&rdev->fence_drv[ring].last_seq) + 1ULL; if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { /* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT; } return radeon_fence_wait_seq(rdev, seq, ring, false, false); }
/** * radeon_fence_count_emitted - get the count of emitted fences * * @rdev: radeon device pointer * @ring: ring index the fence is associated with * * Get the number of fences emitted on the requested ring (all asics). * Returns the number of emitted fences on the ring. Used by the * dynpm code to ring track activity. */ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) { uint64_t emitted; /* We are not protected by ring lock when reading the last sequence * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring); emitted = rdev->fence_drv[ring].sync_seq[ring] - atomic_load_acq_64(&rdev->fence_drv[ring].last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000; } return (unsigned)emitted; }
static int cpu_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { struct cpu_device *cpdev; switch (index) { case CPU_IVAR_PCPU: cpdev = device_get_ivars(child); *result = (uintptr_t)cpdev->cd_pcpu; break; case CPU_IVAR_NOMINAL_MHZ: if (tsc_is_invariant) { *result = (uintptr_t)(atomic_load_acq_64(&tsc_freq) / 1000000); break; } /* FALLTHROUGH */ default: return (ENOENT); } return (0); }
static int delay_tc(int n) { struct timecounter *tc; timecounter_get_t *func; uint64_t end, freq, now; u_int last, mask, u; tc = timecounter; freq = atomic_load_acq_64(&tsc_freq); if (tsc_is_invariant && freq != 0) { func = get_tsc; mask = ~0u; } else { if (tc->tc_quality <= 0) return (0); func = tc->tc_get_timecount; mask = tc->tc_counter_mask; freq = tc->tc_frequency; } now = 0; end = freq * n / 1000000; if (func == get_tsc) sched_pin(); last = func(tc) & mask; do { cpu_spinwait(); u = func(tc) & mask; if (u < last) now += mask - last + u + 1; else now += u - last; last = u; } while (now < end); if (func == get_tsc) sched_unpin(); return (1); }
/** * radeon_fence_wait_seq - wait for a specific sequence number * * @rdev: radeon device pointer * @target_seq: sequence number we want to wait for * @ring: ring index the fence is associated with * @intr: use interruptable sleep * @lock_ring: whether the ring should be locked or not * * Wait for the requested sequence number to be written (all asics). * @intr selects whether to use interruptable (true) or non-interruptable * (false) sleep when waiting for the sequence number. Helper function * for radeon_fence_wait(), et al. * Returns 0 if the sequence number has passed, error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected and the ring is * marked as not ready so no further jobs get scheduled until a successful * reset. */ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, unsigned ring, bool intr, bool lock_ring) { unsigned long timeout, last_activity; uint64_t seq; unsigned i; bool signaled, fence_queue_locked; int r; while (target_seq > atomic_load_acq_64(&rdev->fence_drv[ring].last_seq)) { if (!rdev->ring[ring].ready) { return -EBUSY; } timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; if (time_after(rdev->fence_drv[ring].last_activity, timeout)) { /* the normal case, timeout is somewhere before last_activity */ timeout = rdev->fence_drv[ring].last_activity - timeout; } else { /* either jiffies wrapped around, or no fence was signaled in the last 500ms * anyway we will just wait for the minimum amount and then check for a lockup */ timeout = 1; } seq = atomic_load_acq_64(&rdev->fence_drv[ring].last_seq); /* Save current last activity valuee, used to check for GPU lockups */ last_activity = rdev->fence_drv[ring].last_activity; CTR2(KTR_DRM, "radeon fence: wait begin (ring=%d, seq=%d)", ring, seq); radeon_irq_kms_sw_irq_get(rdev, ring); fence_queue_locked = false; r = 0; while (!(signaled = radeon_fence_seq_signaled(rdev, target_seq, ring))) { if (!fence_queue_locked) { mtx_lock(&rdev->fence_queue_mtx); fence_queue_locked = true; } if (intr) { r = cv_timedwait_sig(&rdev->fence_queue, &rdev->fence_queue_mtx, timeout); } else { r = cv_timedwait(&rdev->fence_queue, &rdev->fence_queue_mtx, timeout); } if (r == EINTR) r = ERESTARTSYS; if (r != 0) { if (r == EWOULDBLOCK) { signaled = radeon_fence_seq_signaled( rdev, target_seq, ring); } break; } } if (fence_queue_locked) { mtx_unlock(&rdev->fence_queue_mtx); } radeon_irq_kms_sw_irq_put(rdev, ring); if (unlikely(r == ERESTARTSYS)) { return -r; } CTR2(KTR_DRM, "radeon fence: wait end (ring=%d, seq=%d)", ring, seq); if (unlikely(!signaled)) { #ifndef __FreeBSD__ /* we were interrupted for some reason and fence * isn't signaled yet, resume waiting */ if (r) { continue; } #endif /* check if sequence value has changed since last_activity */ if (seq != atomic_load_acq_64(&rdev->fence_drv[ring].last_seq)) { continue; } if (lock_ring) { sx_xlock(&rdev->ring_lock); } /* test if somebody else has already decided that this is a lockup */ if (last_activity != rdev->fence_drv[ring].last_activity) { if (lock_ring) { sx_xunlock(&rdev->ring_lock); } continue; } if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { /* good news we believe it's a lockup */ dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016jx last fence id 0x%016jx)\n", (uintmax_t)target_seq, (uintmax_t)seq); /* change last activity so nobody else think there is a lockup */ for (i = 0; i < RADEON_NUM_RINGS; ++i) { rdev->fence_drv[i].last_activity = jiffies; } /* mark the ring as not ready any more */ rdev->ring[ring].ready = false; if (lock_ring) { sx_xunlock(&rdev->ring_lock); } return -EDEADLK; } if (lock_ring) { sx_xunlock(&rdev->ring_lock); } } } return 0; }
/** * radeon_fence_process - process a fence * * @rdev: radeon_device pointer * @ring: ring index the fence is associated with * * Checks the current fence value and wakes the fence queue * if the sequence number has increased (all asics). */ void radeon_fence_process(struct radeon_device *rdev, int ring) { uint64_t seq, last_seq, last_emitted; unsigned count_loop = 0; bool wake = false; /* Note there is a scenario here for an infinite loop but it's * very unlikely to happen. For it to happen, the current polling * process need to be interrupted by another process and another * process needs to update the last_seq btw the atomic read and * xchg of the current process. * * More over for this to go in infinite loop there need to be * continuously new fence signaled ie radeon_fence_read needs * to return a different value each time for both the currently * polling process and the other process that xchg the last_seq * btw atomic read and xchg of the current process. And the * value the other process set as last seq must be higher than * the seq value we just read. Which means that current process * need to be interrupted after radeon_fence_read and before * atomic xchg. * * To be even more safe we count the number of time we loop and * we bail after 10 loop just accepting the fact that we might * have temporarly set the last_seq not to the true real last * seq but to an older one. */ last_seq = atomic_load_acq_64(&rdev->fence_drv[ring].last_seq); do { last_emitted = rdev->fence_drv[ring].sync_seq[ring]; seq = radeon_fence_read(rdev, ring); seq |= last_seq & 0xffffffff00000000LL; if (seq < last_seq) { seq &= 0xffffffff; seq |= last_emitted & 0xffffffff00000000LL; } if (seq <= last_seq || seq > last_emitted) { break; } /* If we loop over we don't want to return without * checking if a fence is signaled as it means that the * seq we just read is different from the previous on. */ wake = true; last_seq = seq; if ((count_loop++) > 10) { /* We looped over too many time leave with the * fact that we might have set an older fence * seq then the current real last seq as signaled * by the hw. */ break; } } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); if (wake) { rdev->fence_drv[ring].last_activity = jiffies; cv_broadcast(&rdev->fence_queue); } }