/* * Marks the current process as idle; i.e., it is no longer able to respond * to a runaway cleanup. However, before it returns from this method, it * would trigger one last runaway cleanup for a pre-dactivation era runaway * event, if necessary. */ void IdleTracker_DeactivateProcess() { if (NULL != MySessionState) { /* * Verify that deactivation during proc_exit_inprogress is protected in * critical section or the interrupt is disabled so that we don't attempt * any runaway cleanup */ AssertImply(proc_exit_inprogress, CritSectionCount > 0 || InterruptHoldoffCount > 0); /* * When an idle process receives a SIGTERM process, the signal handler * die() calls the cleanup directly, so we get here for an idle process. * Instead of re-activating it forcefully, just special case it * and don't do anything during process exit for already inactive processes. */ if (proc_exit_inprogress && ! isProcessActive) { Assert(deactivationVersion >= activationVersion); return; } Assert(isProcessActive); Assert(deactivationVersion <= activationVersion); /* No new runaway event can come in */ SpinLockAcquire(&MySessionState->spinLock); Assert(MySessionState->activeProcessCount <= MySessionState->pinCount); /* No atomic update necessary as the update is protected by spin lock */ MySessionState->activeProcessCount -= 1; Assert(0 <= MySessionState->activeProcessCount); MySessionState->idle_start = GetCurrentTimestamp(); isProcessActive = false; /* Save the point where we reduced the activeProcessCount */ deactivationVersion = *CurrentVersion; /* * Release spinLock as we no longer contend for isRunaway. */ SpinLockRelease(&MySessionState->spinLock); /* * We are still deactivated (i.e., activeProcessCount is decremented). If an ERROR is indeed thrown * from the VmemTracker_StartCleanupIfRunaway, the VmemTracker_RunawayCleanupDoneForProcess() * method would reactivate this process. */ RunawayCleaner_StartCleanup(); /* At this point the process must be clean, unless we don't have a runaway event before deactivation */ Assert(*latestRunawayVersion > deactivationVersion || !RunawayCleaner_IsCleanupInProgress()); } /* At this point the process is ready to be blocked in ReadCommand() */ }
/* * Checks if RunawayCleaner_StartCleanup() does not start cleanup if * the current session is not a runaway */ void test__RunawayCleaner_StartCleanup__IgnoresNonRunaway(void **state) { InitFakeSessionState(2 /* activeProcessCount */, CLEANUP_COUNTDOWN_BEFORE_RUNAWAY /* cleanupCountdown */, RunawayStatus_NotRunaway /* runawayStatus */, 2 /* pinCount */, 0 /* vmem */); static fakeLatestRunawayVersion = 10; latestRunawayVersion = &fakeLatestRunawayVersion; beginCleanupRunawayVersion = 0; RunawayCleaner_StartCleanup(); /* Cleanup shouldn't have begun */ assert_true(beginCleanupRunawayVersion != *latestRunawayVersion); }
/* * Checks if RunawayCleaner_StartCleanup() does not execute a duplicate * cleanup for the same runaway event that it already started cleaning up */ void test__RunawayCleaner_StartCleanup__IgnoresDuplicateCleanup(void **state) { InitFakeSessionState(2 /* activeProcessCount */, 2 /* cleanupCountdown */, RunawayStatus_PrimaryRunawaySession /* runawayStatus */, 2 /* pinCount */, 0 /* vmem */); static fakeLatestRunawayVersion = 10; latestRunawayVersion = &fakeLatestRunawayVersion; beginCleanupRunawayVersion = *latestRunawayVersion; /* * As we are not providing IsCommitInProgress, the call itself verifies * that we are not attempting any cleanup */ RunawayCleaner_StartCleanup(); }
/* * In a red-zone this method identifies the top vmem consuming session, * and requests it to cleanup. If the red-zone handler determines itself * as the runaway session, it also starts the cleanup. */ void RedZoneHandler_DetectRunawaySession() { /* * InterruptHoldoffCount > 0 indicates we are in a sensitive code path that doesn't * like a control flow disruption as may happen from a pending die/cancel interrupt. * As we may eventually ERROR out from this method (during RunawayCleaner_StartCleanup) * we want to make sure that HOLD_INTERRUPTS() was not called (i.e., InterruptHoldoffCount == 0). * * What happens if we don't check for InterruptHoldoffCount? One example is LWLockAcquire() * which calls HOLD_INTERRUPTS() to ensure that no unexpected control * flow disruption happens because of FATAL/ERROR as done from die/cancel interrupt * handler. If we ignore InterruptHoldoffCount, the PGSemaphoreLock() (called from LWLockAcquire) * would call CHECK_FOR_INTERRUPTS() and we may throw ERROR if the current session is a runaway. * Unfortunately, LWLockAcquire shares the semaphore with the regular lock manager and * ProcWaitForSignal. Therefore, LWLockAcquire may wake up multiple times during its wait * for a semaphore which may not relate to an actual LWLock release. This requires LWLockAcquire * to keep track of how many of those false wake events it has consumed (by decrementing semaphore * when it shouldn't have done so) and LWLockAcquire rollback the semaphore decrements for * the irrelevant wake up events by re-incrementing once it actually acquires the lock. * Therefore, an unexpected control flow out of the LWLockAcquire before it properly rolled back * may prevent the LWLockAcquire to rollback the false wake events. Although we do call LWLockRelease * during an error handling, that doesn't guarantee that the falsely consumed semaphore wake * events would be rolled back (i.e., semaphore does not get re-incremented during error handling) as * done at the end of LWLockAcquire. This may cause the semaphore to never wake up other waiting * processes and therefore may cause other processes to hang perpetually. */ if (!RedZoneHandler_IsVmemRedZone() || InterruptHoldoffCount > 0 || CritSectionCount > 0) { return; } /* We don't support runaway detection/termination from non-owner thread */ Assert(MemoryProtection_IsOwnerThread()); Assert(gp_mp_inited); RedZoneHandler_FlagTopConsumer(); RunawayCleaner_StartCleanup(); }
/* * Checks if RunawayCleaner_StartCleanup() ignores cleanup if interrupts are held off */ void test__RunawayCleaner_StartCleanup__IgnoresCleanupInHoldoffInterrupt(void **state) { InitFakeSessionState(2 /* activeProcessCount */, 2 /* cleanupCountdown */, RunawayStatus_PrimaryRunawaySession /* runawayStatus */, 2 /* pinCount */, 12345 /* vmem */); static EventVersion fakeLatestRunawayVersion = 10; latestRunawayVersion = &fakeLatestRunawayVersion; /* * Set beginCleanupRunawayVersion to less than *latestRunawayVersino * to trigger a cleanup */ beginCleanupRunawayVersion = 1; endCleanupRunawayVersion = 1; /* Make sure the cleanup goes through */ vmemTrackerInited = true; isProcessActive = true; CritSectionCount = 0; InterruptHoldoffCount = 1; RunawayCleaner_StartCleanup(); assert_true(beginCleanupRunawayVersion == *latestRunawayVersion); /* Cleanup is done, without ever throwing an ERROR */ assert_true(endCleanupRunawayVersion == beginCleanupRunawayVersion); /* * cleanupCountdown is decremented by 1 as there was no error, and therefore * the cleanup is done within the same call of RunawayCleaner_StartCleanup */ assert_true(MySessionState->cleanupCountdown == 1); InterruptHoldoffCount = 0; }
/* * Checks if RunawayCleaner_RunawayCleanupDoneForProcess() ignores cleanupCountdown * if optional cleanup */ void test__RunawayCleaner_RunawayCleanupDoneForProcess__IgnoresCleanupIfNotRequired(void **state) { #define CLEANUP_COUNTDOWN 2 InitFakeSessionState(2 /* activeProcessCount */, CLEANUP_COUNTDOWN /* cleanupCountdown */, RunawayStatus_PrimaryRunawaySession /* runawayStatus */, 2 /* pinCount */, 12345 /* vmem */); static EventVersion fakeLatestRunawayVersion = 10; latestRunawayVersion = &fakeLatestRunawayVersion; *latestRunawayVersion = 10; /* * Set beginCleanupRunawayVersion to less than *latestRunawayVersino * to trigger a cleanup */ beginCleanupRunawayVersion = 1; endCleanupRunawayVersion = 1; /* Make sure the cleanup is not ignored for vmem initialization */ vmemTrackerInited = true; /* Simulate a deactivation before the runaway */ deactivationVersion = *latestRunawayVersion - 1; activationVersion = *latestRunawayVersion - 1; isProcessActive = false; CritSectionCount = 0; InterruptHoldoffCount = 0; RunawayCleaner_StartCleanup(); /* The cleanup shouldn't even start as the QE was deactivated at the time of the runaway*/ assert_true(beginCleanupRunawayVersion == 1); assert_true(endCleanupRunawayVersion == 1); /* * cleanupCountdown should not be decremented as this was an optional cleanup */ assert_true(MySessionState->cleanupCountdown == CLEANUP_COUNTDOWN); assert_true(MySessionState->runawayStatus == RunawayStatus_PrimaryRunawaySession); /* * Now simulate a scenario where the we activated but the runaway happened * before the activation */ beginCleanupRunawayVersion = 2; endCleanupRunawayVersion = 2; /* Another runaway happened after the last cleanup */ *latestRunawayVersion = beginCleanupRunawayVersion + 2; activationVersion = 5; deactivationVersion = 3; isProcessActive = true; RunawayCleaner_StartCleanup(); /* The cleanup shouldn't even start as the runaway event happened before the QE became active */ assert_true(beginCleanupRunawayVersion == 2); assert_true(endCleanupRunawayVersion == 2); /* * cleanupCountdown should not be decremented as this was an optional cleanup */ assert_true(MySessionState->cleanupCountdown == CLEANUP_COUNTDOWN); assert_true(MySessionState->runawayStatus == RunawayStatus_PrimaryRunawaySession); }
/* * Checks if RunawayCleaner_StartCleanup() starts the cleanup process if * all conditions are met (i.e., no commit is in progress and vmem tracker * is initialized) */ void test__RunawayCleaner_StartCleanup__StartsCleanupIfPossible(void **state) { InitFakeSessionState(2 /* activeProcessCount */, 2 /* cleanupCountdown */, RunawayStatus_PrimaryRunawaySession /* runawayStatus */, 2 /* pinCount */, 12345 /* vmem */); static fakeLatestRunawayVersion = 10; latestRunawayVersion = &fakeLatestRunawayVersion; *latestRunawayVersion = 10; /* * Set beginCleanupRunawayVersion to less than *latestRunawayVersion * to trigger a cleanup */ beginCleanupRunawayVersion = 1; endCleanupRunawayVersion = 1; isProcessActive = true; /* Make sure the cleanup goes through */ vmemTrackerInited = true; CritSectionCount = 0; InterruptHoldoffCount = 0; /* We need a valid gp_command_count to execute cleanup */ gp_command_count = 1; will_return(superuser, false); #ifdef FAULT_INJECTOR expect_value(FaultInjector_InjectFaultIfSet, identifier, RunawayCleanup); expect_value(FaultInjector_InjectFaultIfSet, ddlStatement, DDLNotSpecified); expect_value(FaultInjector_InjectFaultIfSet, databaseName, ""); expect_value(FaultInjector_InjectFaultIfSet, tableName, ""); will_be_called(FaultInjector_InjectFaultIfSet); #endif EXPECT_EREPORT(ERROR); PG_TRY(); { RunawayCleaner_StartCleanup(); assert_false("Cleanup didn't throw error"); } PG_CATCH(); { } PG_END_TRY(); assert_true(beginCleanupRunawayVersion == *latestRunawayVersion); /* We should not finish the cleanup as we errored out */ assert_true(endCleanupRunawayVersion == 1); /* cleanupCountdown shouldn't change as we haven't finished cleanup */ assert_true(MySessionState->cleanupCountdown == 2); /* * If we call RunawayCleaner_StartCleanup again for the same runaway event, * it should be a noop, therefore requiring no "will_be_called" setup */ RunawayCleaner_StartCleanup(); }