static inline void lock(struct thr_spin_lock* sl) { unsigned int lockval = LOCK_VAL; volatile unsigned* val = &sl->m_lock; test: if (likely(xcng(val, lockval) == UNLOCK_VAL)) return; lock_slow(sl); }
static void lock_slow(struct thr_spin_lock* sl) { unsigned int lockval = LOCK_VAL; volatile unsigned* val = &sl->m_lock; test: do { cpu_pause(); } while (* val == lockval); if (likely(xcng(val, lockval) == UNLOCK_VAL)) return; goto test; }
void WatchDog::run() { unsigned int sleep_time; NDB_TICKS last_ticks, now; Uint32 numThreads; Uint32 counterValue[MAX_WATCHED_THREADS]; Uint32 oldCounterValue[MAX_WATCHED_THREADS]; Uint32 threadId[MAX_WATCHED_THREADS]; NDB_TICKS start_ticks[MAX_WATCHED_THREADS]; Uint32 theIntervalCheck[MAX_WATCHED_THREADS]; Uint32 elapsed[MAX_WATCHED_THREADS]; if (!NdbTick_IsMonotonic()) { g_eventLogger->warning("A monotonic timer was not available on this platform."); g_eventLogger->warning("Adjusting system time manually, or otherwise (e.g. NTP), " "may cause false watchdog alarms, temporary freeze, or node shutdown."); } last_ticks = NdbTick_getCurrentTicks(); while (!theStop) { sleep_time= 100; NdbSleep_MilliSleep(sleep_time); if(theStop) break; now = NdbTick_getCurrentTicks(); if (NdbTick_Compare(now, last_ticks) < 0) { g_eventLogger->warning("Watchdog: Time ticked backwards %llu ms.", NdbTick_Elapsed(now, last_ticks).milliSec()); /** * A backtick after sleeping 100ms, is considdered a * fatal error if monotonic timers are used. */ assert(!NdbTick_IsMonotonic()); } // Print warnings if sleeping much longer than expected else if (NdbTick_Elapsed(last_ticks, now).milliSec() > sleep_time*2) { struct tms my_tms; if (times(&my_tms) != (clock_t)-1) { g_eventLogger->info("Watchdog: User time: %llu System time: %llu", (Uint64)my_tms.tms_utime, (Uint64)my_tms.tms_stime); } else { g_eventLogger->info("Watchdog: User time: %llu System time: %llu (errno=%d)", (Uint64)my_tms.tms_utime, (Uint64)my_tms.tms_stime, errno); } g_eventLogger->warning("Watchdog: Warning overslept %llu ms, expected %u ms.", NdbTick_Elapsed(last_ticks, now).milliSec(), sleep_time); } last_ticks = now; /* Copy out all active counters under locked mutex, then check them afterwards without holding the mutex. */ NdbMutex_Lock(m_mutex); numThreads = m_watchedCount; for (Uint32 i = 0; i < numThreads; i++) { #ifdef NDB_HAVE_XCNG /* atomically read and clear watchdog counter */ counterValue[i] = xcng(m_watchedList[i].m_watchCounter, 0); #else counterValue[i] = *(m_watchedList[i].m_watchCounter); #endif if (likely(counterValue[i] != 0)) { /* The thread responded since last check, so just update state until next check. */ #ifndef NDB_HAVE_XCNG /* There is a small race here. If the thread changes the counter in-between the read and setting to zero here in the watchdog thread, then gets stuck immediately after, we may report the wrong action that it got stuck on. But there will be no reporting of non-stuck thread because of this race, nor will there be missed reporting. */ *(m_watchedList[i].m_watchCounter) = 0; #endif m_watchedList[i].m_startTicks = now; m_watchedList[i].m_slowWarnDelay = theInterval; m_watchedList[i].m_lastCounterValue = counterValue[i]; } else { start_ticks[i] = m_watchedList[i].m_startTicks; threadId[i] = m_watchedList[i].m_threadId; oldCounterValue[i] = m_watchedList[i].m_lastCounterValue; theIntervalCheck[i] = m_watchedList[i].m_slowWarnDelay; elapsed[i] = (Uint32)NdbTick_Elapsed(start_ticks[i], now).milliSec(); if (oldCounterValue[i] == 9 && elapsed[i] >= theIntervalCheck[i]) m_watchedList[i].m_slowWarnDelay += theInterval; } } NdbMutex_Unlock(m_mutex); /* Now check each watched thread if it has reported progress since previous check. Warn about any stuck threads, and eventually force shutdown the server. */ for (Uint32 i = 0; i < numThreads; i++) { if (counterValue[i] != 0) continue; /* Counter value == 9 indicates malloc going on, this can take some time so only warn if we pass the watchdog interval */ if (oldCounterValue[i] != 9 || elapsed[i] >= theIntervalCheck[i]) { const char *last_stuck_action = get_action(oldCounterValue[i]); if (last_stuck_action != NULL) { g_eventLogger->warning("Ndb kernel thread %u is stuck in: %s " "elapsed=%u", threadId[i], last_stuck_action, elapsed[i]); } else { g_eventLogger->warning("Ndb kernel thread %u is stuck in: Unknown place %u " "elapsed=%u", threadId[i], oldCounterValue[i], elapsed[i]); } { struct tms my_tms; if (times(&my_tms) != (clock_t)-1) { g_eventLogger->info("Watchdog: User time: %llu System time: %llu", (Uint64)my_tms.tms_utime, (Uint64)my_tms.tms_stime); } else { g_eventLogger->info("Watchdog: User time: %llu System time: %llu (errno=%d)", (Uint64)my_tms.tms_utime, (Uint64)my_tms.tms_stime, errno); } } if (elapsed[i] > 3 * theInterval) { shutdownSystem(last_stuck_action); } } } } return; }