/* * ClockSweepTick - Helper routine for StrategyGetBuffer() * * Move the clock hand one buffer ahead of its current position and return the * id of the buffer now under the hand. */ static inline uint32 ClockSweepTick(void) { uint32 victim; /* * Atomically move hand ahead one buffer - if there's several processes * doing this, this can lead to buffers being returned slightly out of * apparent order. */ victim = pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1); if (victim >= NBuffers) { uint32 originalVictim = victim; /* always wrap what we look up in BufferDescriptors */ victim = victim % NBuffers; /* * If we're the one that just caused a wraparound, force * completePasses to be incremented while holding the spinlock. We * need the spinlock so StrategySyncStart() can return a consistent * value consisting of nextVictimBuffer and completePasses. */ if (victim == 0) { uint32 expected; uint32 wrapped; bool success = false; expected = originalVictim + 1; while (!success) { /* * Acquire the spinlock while increasing completePasses. That * allows other readers to read nextVictimBuffer and * completePasses in a consistent manner which is required for * StrategySyncStart(). In theory delaying the increment * could lead to an overflow of nextVictimBuffers, but that's * highly unlikely and wouldn't be particularly harmful. */ SpinLockAcquire(&StrategyControl->buffer_strategy_lock); wrapped = expected % NBuffers; success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer, &expected, wrapped); if (success) StrategyControl->completePasses++; SpinLockRelease(&StrategyControl->buffer_strategy_lock); } } } return victim; }
/* * Retrieve a new cache entry from the pre-allocated freelist. * The client has to either insert the entry in the cache or surrender it. * * This function calls the populateEntry callback function to populate the * entry before returning it to the client. * * populate_param is the opaque parameter to be passed to the populateEntry function. * * Return NULL if freelist is empty. * */ CacheEntry * Cache_AcquireEntry(Cache *cache, void *populate_param) { Assert(NULL != cache); CacheEntry *newEntry = Cache_GetFreeElement(cache); if (NULL == newEntry) { return NULL; } CACHE_ASSERT_WIPED(newEntry); uint32 expected = CACHE_ENTRY_FREE; #ifdef USE_ASSERT_CHECKING int32 casResult = #endif pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&newEntry->state, &expected, CACHE_ENTRY_RESERVED); Assert(1 == casResult); /* * In RESERVED state nobody else will try to read this entry, not even * the views. No need to lock the entry while populating. */ if (cache->populateEntry) { cache->populateEntry(CACHE_ENTRY_PAYLOAD(newEntry), populate_param); } expected = CACHE_ENTRY_RESERVED; #ifdef USE_ASSERT_CHECKING casResult = #endif pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&newEntry->state, &expected, CACHE_ENTRY_ACQUIRED); Assert(1 == casResult); Cache_RegisterCleanup(cache, newEntry, false /* isCachedEntry */ ); return newEntry; }
/* * Mark an entry for removal from the cache. * The entry is not immediately deleted, as there is at least one client * using it. * The entry will not be found using look-up operations after this step. * The entry will physically be removed once all using clients release it. * * This function is not synchronized. Multiple clients can mark an entry * deleted. */ void Cache_Remove(Cache *cache, CacheEntry *entry) { Assert(NULL != entry); uint32 expected = CACHE_ENTRY_CACHED; #ifdef USE_ASSERT_CHECKING int32 casResult = #endif pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&entry->state, &expected, CACHE_ENTRY_DELETED); Assert(casResult == 1); Cache_DecPerfCounter(&cache->cacheHdr->cacheStats.noCachedEntries, 1 /* delta */); Cache_AddPerfCounter(&cache->cacheHdr->cacheStats.noDeletedEntries, 1 /* delta */); }
static void test_atomic_uint32(void) { pg_atomic_uint32 var; uint32 expected; int i; pg_atomic_init_u32(&var, 0); if (pg_atomic_read_u32(&var) != 0) elog(ERROR, "atomic_read_u32() #1 wrong"); pg_atomic_write_u32(&var, 3); if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "atomic_read_u32() #2 wrong"); if (pg_atomic_fetch_add_u32(&var, 1) != 3) elog(ERROR, "atomic_fetch_add_u32() #1 wrong"); if (pg_atomic_fetch_sub_u32(&var, 1) != 4) elog(ERROR, "atomic_fetch_sub_u32() #1 wrong"); if (pg_atomic_sub_fetch_u32(&var, 3) != 0) elog(ERROR, "atomic_sub_fetch_u32() #1 wrong"); if (pg_atomic_add_fetch_u32(&var, 10) != 10) elog(ERROR, "atomic_add_fetch_u32() #1 wrong"); if (pg_atomic_exchange_u32(&var, 5) != 10) elog(ERROR, "pg_atomic_exchange_u32() #1 wrong"); if (pg_atomic_exchange_u32(&var, 0) != 5) elog(ERROR, "pg_atomic_exchange_u32() #0 wrong"); /* test around numerical limits */ if (pg_atomic_fetch_add_u32(&var, INT_MAX) != 0) elog(ERROR, "pg_atomic_fetch_add_u32() #2 wrong"); if (pg_atomic_fetch_add_u32(&var, INT_MAX) != INT_MAX) elog(ERROR, "pg_atomic_add_fetch_u32() #3 wrong"); pg_atomic_fetch_add_u32(&var, 1); /* top up to UINT_MAX */ if (pg_atomic_read_u32(&var) != UINT_MAX) elog(ERROR, "atomic_read_u32() #2 wrong"); if (pg_atomic_fetch_sub_u32(&var, INT_MAX) != UINT_MAX) elog(ERROR, "pg_atomic_fetch_sub_u32() #2 wrong"); if (pg_atomic_read_u32(&var) != (uint32) INT_MAX + 1) elog(ERROR, "atomic_read_u32() #3 wrong: %u", pg_atomic_read_u32(&var)); expected = pg_atomic_sub_fetch_u32(&var, INT_MAX); if (expected != 1) elog(ERROR, "pg_atomic_sub_fetch_u32() #3 wrong: %u", expected); pg_atomic_sub_fetch_u32(&var, 1); /* fail exchange because of old expected */ expected = 10; if (pg_atomic_compare_exchange_u32(&var, &expected, 1)) elog(ERROR, "atomic_compare_exchange_u32() changed value spuriously"); /* CAS is allowed to fail due to interrupts, try a couple of times */ for (i = 0; i < 1000; i++) { expected = 0; if (!pg_atomic_compare_exchange_u32(&var, &expected, 1)) break; } if (i == 1000) elog(ERROR, "atomic_compare_exchange_u32() never succeeded"); if (pg_atomic_read_u32(&var) != 1) elog(ERROR, "atomic_compare_exchange_u32() didn't set value properly"); pg_atomic_write_u32(&var, 0); /* try setting flagbits */ if (pg_atomic_fetch_or_u32(&var, 1) & 1) elog(ERROR, "pg_atomic_fetch_or_u32() #1 wrong"); if (!(pg_atomic_fetch_or_u32(&var, 2) & 1)) elog(ERROR, "pg_atomic_fetch_or_u32() #2 wrong"); if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "invalid result after pg_atomic_fetch_or_u32()"); /* try clearing flagbits */ if ((pg_atomic_fetch_and_u32(&var, ~2) & 3) != 3) elog(ERROR, "pg_atomic_fetch_and_u32() #1 wrong"); if (pg_atomic_fetch_and_u32(&var, ~1) != 1) elog(ERROR, "pg_atomic_fetch_and_u32() #2 wrong: is %u", pg_atomic_read_u32(&var)); /* no bits set anymore */ if (pg_atomic_fetch_and_u32(&var, ~0) != 0) elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong"); }
/* * Perform garbage collection (if required) of file * @param map_path path to file map file (*.map). */ static bool cfs_gc_file(char* map_path) { int md = open(map_path, O_RDWR|PG_BINARY, 0); FileMap* map; uint32 physSize; uint32 usedSize; uint32 virtSize; int suf = strlen(map_path)-4; int fd = -1, fd2 = -1, md2 = -1; bool succeed = true; if (md < 0) { elog(LOG, "Failed to open map file %s: %m", map_path); return false; } map = cfs_mmap(md); if (map == MAP_FAILED) { elog(LOG, "Failed to map file %s: %m", map_path); close(md); return false; } usedSize = pg_atomic_read_u32(&map->usedSize); physSize = pg_atomic_read_u32(&map->physSize); virtSize = pg_atomic_read_u32(&map->virtSize); if ((physSize - usedSize)*100 > physSize*cfs_gc_threshold) /* do we need to perform defragmentation? */ { long delay = CFS_LOCK_MIN_TIMEOUT; char* file_path = (char*)palloc(suf+1); char* map_bck_path = (char*)palloc(suf+10); char* file_bck_path = (char*)palloc(suf+5); FileMap* newMap = (FileMap*)palloc0(sizeof(FileMap)); uint32 newSize = 0; inode_t** inodes = (inode_t**)palloc(RELSEG_SIZE*sizeof(inode_t*)); bool remove_backups = true; int n_pages = virtSize / BLCKSZ; TimestampTz startTime, endTime; long secs; int usecs; int i; startTime = GetCurrentTimestamp(); memcpy(file_path, map_path, suf); file_path[suf] = '\0'; strcat(strcpy(map_bck_path, map_path), ".bck"); strcat(strcpy(file_bck_path, file_path), ".bck"); while (true) { uint32 access_count = 0; if (pg_atomic_compare_exchange_u32(&map->lock, &access_count, CFS_GC_LOCK)) { break; } if (access_count >= CFS_GC_LOCK) { /* Uhhh... looks like last GC was interrupted. * Try to recover file */ if (access(file_bck_path, R_OK) != 0) { /* There is no backup file: new map should be constructed */ md2 = open(map_bck_path, O_RDWR|PG_BINARY, 0); if (md2 >= 0) { /* Recover map */ if (!cfs_read_file(md2, newMap, sizeof(FileMap))) { elog(LOG, "Failed to read file %s: %m", map_bck_path); goto Cleanup; } close(md2); md2 = -1; newSize = pg_atomic_read_u32(&newMap->usedSize); remove_backups = false; goto ReplaceMap; } } else { /* Presence of backup file means that we still have unchanged data and map files. * Just remove backup files, grab lock and continue processing */ unlink(file_bck_path); unlink(map_bck_path); break; } } pg_usleep(delay); if (delay < CFS_LOCK_MAX_TIMEOUT) { delay *= 2; } } md2 = open(map_bck_path, O_CREAT|O_RDWR|PG_BINARY|O_TRUNC, 0600); if (md2 < 0) { goto Cleanup; } for (i = 0; i < n_pages; i++) { newMap->inodes[i] = map->inodes[i]; inodes[i] = &newMap->inodes[i]; } /* sort inodes by offset to improve read locality */ qsort(inodes, n_pages, sizeof(inode_t*), cfs_cmp_page_offs); fd = open(file_path, O_RDWR|PG_BINARY, 0); if (fd < 0) { goto Cleanup; } fd2 = open(file_bck_path, O_CREAT|O_RDWR|PG_BINARY|O_TRUNC, 0600); if (fd2 < 0) { goto Cleanup; } for (i = 0; i < n_pages; i++) { int size = CFS_INODE_SIZE(*inodes[i]); if (size != 0) { char block[BLCKSZ]; off_t rc PG_USED_FOR_ASSERTS_ONLY; uint32 offs = CFS_INODE_OFFS(*inodes[i]); Assert(size <= BLCKSZ); rc = lseek(fd, offs, SEEK_SET); Assert(rc == offs); if (!cfs_read_file(fd, block, size)) { elog(LOG, "Failed to read file %s: %m", file_path); goto Cleanup; } if (!cfs_write_file(fd2, block, size)) { elog(LOG, "Failed to write file %s: %m", file_bck_path); goto Cleanup; } offs = newSize; newSize += size; *inodes[i] = CFS_INODE(size, offs); } } pg_atomic_write_u32(&map->usedSize, newSize); if (close(fd) < 0) { elog(LOG, "Failed to close file %s: %m", file_path); goto Cleanup; } fd = -1; /* Persist copy of data file */ if (pg_fsync(fd2) < 0) { elog(LOG, "Failed to sync file %s: %m", file_bck_path); goto Cleanup; } if (close(fd2) < 0) { elog(LOG, "Failed to close file %s: %m", file_bck_path); goto Cleanup; } fd2 = -1; /* Persist copy of map file */ if (!cfs_write_file(md2, &newMap, sizeof(newMap))) { elog(LOG, "Failed to write file %s: %m", map_bck_path); goto Cleanup; } if (pg_fsync(md2) < 0) { elog(LOG, "Failed to sync file %s: %m", map_bck_path); goto Cleanup; } if (close(md2) < 0) { elog(LOG, "Failed to close file %s: %m", map_bck_path); goto Cleanup; } md2 = -1; /* Persist map with CFS_GC_LOCK set: in case of crash we will know that map may be changed by GC */ if (cfs_msync(map) < 0) { elog(LOG, "Failed to sync map %s: %m", map_path); goto Cleanup; } if (pg_fsync(md) < 0) { elog(LOG, "Failed to sync file %s: %m", map_path); goto Cleanup; } /* * Now all information necessary for recovery is stored. * We are ready to replace existed file with defragmented one. * Use rename and rely on file system to provide atomicity of this operation. */ remove_backups = false; if (rename(file_bck_path, file_path) < 0) { elog(LOG, "Failed to rename file %s: %m", file_path); goto Cleanup; } ReplaceMap: /* At this moment defragmented file version is stored. We can perfrom in-place update of map. * If crash happens at this point, map can be recovered from backup file */ memcpy(map->inodes, newMap->inodes, n_pages * sizeof(inode_t)); pg_atomic_write_u32(&map->usedSize, newSize); pg_atomic_write_u32(&map->physSize, newSize); map->generation += 1; /* force all backends to reopen the file */ /* Before removing backup files and releasing locks we need to flush updated map file */ if (cfs_msync(map) < 0) { elog(LOG, "Failed to sync map %s: %m", map_path); goto Cleanup; } if (pg_fsync(md) < 0) { elog(LOG, "Failed to sync file %s: %m", map_path); Cleanup: if (fd >= 0) close(fd); if (fd2 >= 0) close(fd2); if (md2 >= 0) close(md2); if (remove_backups) { unlink(file_bck_path); unlink(map_bck_path); remove_backups = false; } succeed = false; } else { remove_backups = true; /* now backups are not need any more */ } pg_atomic_fetch_sub_u32(&map->lock, CFS_GC_LOCK); /* release lock */ /* remove map backup file */ if (remove_backups && unlink(map_bck_path)) { elog(LOG, "Failed to unlink file %s: %m", map_bck_path); succeed = false; } endTime = GetCurrentTimestamp(); TimestampDifference(startTime, endTime, &secs, &usecs); elog(LOG, "%d: defragment file %s: old size %d, new size %d, logical size %d, used %d, compression ratio %f, time %ld usec", MyProcPid, file_path, physSize, newSize, virtSize, usedSize, (double)virtSize/newSize, secs*USECS_PER_SEC + usecs); pfree(file_path); pfree(file_bck_path); pfree(map_bck_path); pfree(inodes); pfree(newMap); if (cfs_gc_delay != 0) { int rc = WaitLatch(MyLatch, WL_TIMEOUT | WL_POSTMASTER_DEATH, cfs_gc_delay /* ms */ ); if (rc & WL_POSTMASTER_DEATH) { exit(1); } } } else if (cfs_state->max_iterations == 1) { elog(LOG, "%d: file %.*s: physical size %d, logical size %d, used %d, compression ratio %f", MyProcPid, suf, map_path, physSize, virtSize, usedSize, (double)virtSize/physSize); } if (cfs_munmap(map) < 0) { elog(LOG, "Failed to unmap file %s: %m", map_path); succeed = false; } if (close(md) < 0) { elog(LOG, "Failed to close file %s: %m", map_path); succeed = false; } return succeed; }
/* * Update logical file size */ void cfs_extend(FileMap* map, uint32 newSize) { uint32 oldSize = pg_atomic_read_u32(&map->virtSize); while (newSize > oldSize && !pg_atomic_compare_exchange_u32(&map->virtSize, &oldSize, newSize)); }
/* * Run cache eviction algorithm * * It will try to evict enough entries to add up to evictSize. Returns the * actual accumulated size of the entries evicted */ int64 Cache_Evict(Cache *cache, int64 evictRequestSize) { Assert(NULL != cache); Assert(evictRequestSize > 0); Cache_TimedOperationStart(); int64 evictedSize = 0; uint32 unsuccessfulLoops = 0; bool foundVictim = false; uint32 decAmount = cache->cacheHdr->policyContext.utilityDecrement; Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats; while (true) { bool wraparound = false; int32 entryIdx = Cache_NextClockHand(cache, &wraparound); Assert(entryIdx < cache->cacheHdr->nEntries); Cache_AddPerfCounter(&cacheStats->noEntriesScanned,1 /* delta */); if (wraparound) { unsuccessfulLoops++; Cache_AddPerfCounter(&cacheStats->noWraparound, 1 /* delta */); if (!foundVictim) { /* * We looped around and did not manage to evict any entries. * Double the amount we decrement eviction candidate's utility by. * This makes the eviction algorithm look for a victim more aggressively */ if (decAmount <= CACHE_MAX_UTILITY / 2) { decAmount = 2 * decAmount; } else { decAmount = CACHE_MAX_UTILITY; } } foundVictim = false; if (unsuccessfulLoops > cache->cacheHdr->policyContext.maxClockLoops) { /* Can't find any cached and unused entries candidates for evictions, even after looping around * maxClockLoops times. Give up looking for victims. */ Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict); break; } } CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx); if (crtEntry->state != CACHE_ENTRY_CACHED) { /* Not interested in free/acquired/deleted entries. Go back and advance clock hand */ continue; } CacheAnchor *anchor = (CacheAnchor *) SyncHTLookup(cache->syncHashtable, &crtEntry->hashvalue); if (NULL == anchor) { /* There's no anchor for this entry, someone might have snatched it in the meantime */ continue; } SpinLockAcquire(&anchor->spinlock); if (crtEntry->state != CACHE_ENTRY_CACHED) { /* Someone freed this entry in the meantime, before we got a chance to acquire the anchor lock */ SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); continue; } /* Ok, did all the checks, this entry must be valid now */ CACHE_ASSERT_VALID(crtEntry); if (crtEntry->pinCount > 0) { /* Entry is in use and can't be evicted. Go back and advance clock hand */ SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); continue; } /* Decrement utility */ gp_atomic_dec_positive_32(&crtEntry->utility, decAmount); /* Just decremented someone's utility. Reset our unsuccessful loops counter */ unsuccessfulLoops = 0; if (crtEntry->utility > 0) { /* Entry has non-zero utility, we shouldn't evict it. Go back and advance clock hand */ SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); continue; } /* Found our victim */ Assert(0 == crtEntry->pinCount); CACHE_ASSERT_VALID(crtEntry); Assert(crtEntry->utility == 0); uint32 expected = CACHE_ENTRY_CACHED; #if USE_ASSERT_CHECKING int32 casResult = #endif pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&crtEntry->state, &expected, CACHE_ENTRY_DELETED); Assert(1 == casResult); SpinLockRelease(&anchor->spinlock); foundVictim = true; evictedSize += crtEntry->size; /* Don't update noFreeEntries yet. It will be done in Cache_AddToFreelist */ Cache_DecPerfCounter(&cacheStats->noCachedEntries, 1 /* delta */); /* Unlink entry from the anchor chain */ SpinLockAcquire(&anchor->spinlock); Cache_UnlinkEntry(cache, anchor, crtEntry); SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); /* Call client-side cleanup for entry */ cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(crtEntry)); Cache_LockEntry(cache, crtEntry); Assert(crtEntry->state == CACHE_ENTRY_DELETED); crtEntry->state = CACHE_ENTRY_FREE; #if USE_ASSERT_CHECKING Cache_MemsetPayload(cache, crtEntry); #endif Cache_UnlockEntry(cache, crtEntry); Cache_AddToFreelist(cache, crtEntry); Cache_AddPerfCounter(&cacheStats->noEvicts, 1 /* delta */); Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict); if (evictedSize >= evictRequestSize) { /* We evicted as much as requested */ break; } Cache_TimedOperationStart(); } return evictedSize; }
/* * Inserts a previously acquired entry in the cache. * * This function should never fail. */ void Cache_Insert(Cache *cache, CacheEntry *entry) { Assert(NULL != cache); Assert(NULL != entry); Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats; Cache_TimedOperationStart(); Cache_AddPerfCounter(&cacheStats->noInserts, 1 /* delta */); Cache_AddPerfCounter(&cacheStats->noCachedEntries, 1 /* delta */); Cache_DecPerfCounter(&cacheStats->noAcquiredEntries, 1 /* delta */); Cache_AddPerfCounter64(&cacheStats->totalEntrySize, entry->size); Cache_ComputeEntryHashcode(cache, entry); /* Look up or insert anchor element for this entry */ bool existing = false; volatile CacheAnchor *anchor = SyncHTInsert(cache->syncHashtable, &entry->hashvalue, &existing); /* * This should never happen since the SyncHT has as many entries as the SharedCache, * and we'll run out of SharedCache entries before we fill up the SyncHT */ insist_log(NULL != anchor, "Could not insert in the cache: SyncHT full"); /* Acquire anchor lock to touch the chain */ SpinLockAcquire(&anchor->spinlock); if (NULL == anchor->firstEntry) { Assert(NULL == anchor->lastEntry); anchor->firstEntry = anchor->lastEntry = entry; } else { Assert(NULL != anchor->lastEntry); anchor->lastEntry->nextEntry = entry; anchor->lastEntry = entry; } entry->nextEntry = NULL; Cache_EntryAddRef(cache, entry); uint32 expected = CACHE_ENTRY_ACQUIRED; #ifdef USE_ASSERT_CHECKING int32 casResult = #endif pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&entry->state, &expected, CACHE_ENTRY_CACHED); Assert(1 == casResult); Assert(NULL != anchor->firstEntry && NULL != anchor->lastEntry); SpinLockRelease(&anchor->spinlock); #ifdef USE_ASSERT_CHECKING bool deleted = #endif SyncHTRelease(cache->syncHashtable, (void *) anchor); Assert(!deleted); Cache_TimedOperationRecord(&cacheStats->timeInserts, &cacheStats->maxTimeInsert); }
/* * Marks the current process as clean. If all the processes are marked * as clean for this session (i.e., cleanupCountdown == 0 in the * MySessionState) then we reset session's runaway status as well as * the runaway detector flag (i.e., a new runaway detector can run). * * Parameters: * ignoredCleanup: whether the cleanup was ignored, i.e., no elog(ERROR, ...) * was thrown. In such case a deactivated process is not reactivated as the * deactivation didn't get interrupted. */ void RunawayCleaner_RunawayCleanupDoneForProcess(bool ignoredCleanup) { /* * We don't do anything if we don't have an ongoing cleanup, or we already finished * cleanup once for the current runaway event */ if (beginCleanupRunawayVersion != *latestRunawayVersion || endCleanupRunawayVersion == beginCleanupRunawayVersion) { /* Either we never started cleanup, or we already finished */ return; } /* Disable repeating call */ endCleanupRunawayVersion = beginCleanupRunawayVersion; Assert(NULL != MySessionState); /* * As the current cleanup holds leverage on the cleanupCountdown, * the session must stay as runaway at least until the current * process marks itself clean */ Assert(MySessionState->runawayStatus != RunawayStatus_NotRunaway); /* We only cleanup if we were active when the runaway event happened */ Assert((!isProcessActive && *latestRunawayVersion < deactivationVersion && *latestRunawayVersion > activationVersion) || (*latestRunawayVersion > activationVersion && (activationVersion >= deactivationVersion && isProcessActive))); /* * We don't reactivate if the process is already active or a deactivated * process never errored out during deactivation (i.e., failed to complete * deactivation) */ if (!isProcessActive && !ignoredCleanup) { Assert(1 == *isRunawayDetector); Assert(0 < MySessionState->cleanupCountdown); /* * As the process threw ERROR instead of going into ReadCommand() blocking * state, we have to reactivate the process from its current Deactivated * state */ IdleTracker_ActivateProcess(); } Assert(0 < MySessionState->cleanupCountdown); #if USE_ASSERT_CHECKING int cleanProgress = #endif pg_atomic_add_fetch_u32((pg_atomic_uint32 *)&MySessionState->cleanupCountdown, -1); Assert(0 <= cleanProgress); uint32 expected = 0; bool finalCleaner = pg_atomic_compare_exchange_u32((pg_atomic_uint32 *) &MySessionState->cleanupCountdown, &expected, CLEANUP_COUNTDOWN_BEFORE_RUNAWAY); if (finalCleaner) { /* * The final cleaner is responsible to reset the runaway flag, * and enable the runaway detection process. */ RunawayCleaner_RunawayCleanupDoneForSession(); } /* * Finally we are done with all critical cleanup, which includes releasing all our memory and * releasing our cleanup counter so that another session can be marked as runaway, if needed. * Now, we have some head room to actually record our usage. */ write_stderr("Logging memory usage because of runaway cleanup. Note, this is a post-cleanup logging and may be incomplete."); MemoryAccounting_SaveToLog(); MemoryContextStats(TopMemoryContext); }