/* * UpdateTimeAtomically * * Updates a OOMTimeType variable atomically, using compare_and_swap_* */ void UpdateTimeAtomically(volatile OOMTimeType* time_var) { bool updateCompleted = false; OOMTimeType newOOMTime; while (!updateCompleted) { #if defined(__x86_64__) newOOMTime = GetCurrentTimestamp(); #else struct timeval curTime; gettimeofday(&curTime, NULL); newOOMTime = (uint32)curTime.tv_sec; #endif OOMTimeType oldOOMTime = *time_var; #if defined(__x86_64__) updateCompleted = compare_and_swap_64((uint64*)time_var, (uint64)oldOOMTime, (uint64)newOOMTime); #else updateCompleted = compare_and_swap_32((uint32*)time_var, (uint32)oldOOMTime, (uint32)newOOMTime); #endif } }
/* * Retrieve a new cache entry from the pre-allocated freelist. * The client has to either insert the entry in the cache or surrender it. * * This function calls the populateEntry callback function to populate the * entry before returning it to the client. * * populate_param is the opaque parameter to be passed to the populateEntry function. * * Return NULL if freelist is empty. * */ CacheEntry * Cache_AcquireEntry(Cache *cache, void *populate_param) { Assert(NULL != cache); CacheEntry *newEntry = Cache_GetFreeElement(cache); if (NULL == newEntry) { return NULL; } CACHE_ASSERT_WIPED(newEntry); #ifdef USE_ASSERT_CHECKING int32 casResult = #endif compare_and_swap_32(&newEntry->state, CACHE_ENTRY_FREE, CACHE_ENTRY_RESERVED); Assert(1 == casResult); /* * In RESERVED state nobody else will try to read this entry, not even * the views. No need to lock the entry while populating. */ if (cache->populateEntry) { cache->populateEntry(CACHE_ENTRY_PAYLOAD(newEntry), populate_param); } #ifdef USE_ASSERT_CHECKING casResult = #endif compare_and_swap_32(&newEntry->state, CACHE_ENTRY_RESERVED, CACHE_ENTRY_ACQUIRED); Assert(1 == casResult); Cache_RegisterCleanup(cache, newEntry, false /* isCachedEntry */ ); return newEntry; }
/* * Mark an entry for removal from the cache. * The entry is not immediately deleted, as there is at least one client * using it. * The entry will not be found using look-up operations after this step. * The entry will physically be removed once all using clients release it. * * This function is not synchronized. Multiple clients can mark an entry * deleted. */ void Cache_Remove(Cache *cache, CacheEntry *entry) { Assert(NULL != entry); #ifdef USE_ASSERT_CHECKING int32 casResult = #endif compare_and_swap_32(&entry->state, CACHE_ENTRY_CACHED, CACHE_ENTRY_DELETED); Assert(casResult == 1); Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noCachedEntries, -1 /* delta */); Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noDeletedEntries, 1 /* delta */); }
/* * update_spins_per_delay * Update spins_per_delay value in ProcGlobal. */ static void update_spins_per_delay() { volatile PROC_HDR *procglobal = ProcGlobal; bool casResult = false; while (!casResult) { int old_spins_per_delay = procglobal->spins_per_delay; int new_spins_per_delay = recompute_spins_per_delay(old_spins_per_delay); casResult = compare_and_swap_32((uint32*)&procglobal->spins_per_delay, old_spins_per_delay, new_spins_per_delay); } }
/* * Run cache eviction algorithm * * It will try to evict enough entries to add up to evictSize. Returns the * actual accumulated size of the entries evicted */ int64 Cache_Evict(Cache *cache, int64 evictRequestSize) { Assert(NULL != cache); Assert(evictRequestSize > 0); Cache_TimedOperationStart(); int64 evictedSize = 0; uint32 unsuccessfulLoops = 0; bool foundVictim = false; uint32 decAmount = cache->cacheHdr->policyContext.utilityDecrement; Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats; while (true) { bool wraparound = false; int32 entryIdx = Cache_NextClockHand(cache, &wraparound); Assert(entryIdx < cache->cacheHdr->nEntries); Cache_UpdatePerfCounter(&cacheStats->noEntriesScanned,1 /* delta */); if (wraparound) { unsuccessfulLoops++; Cache_UpdatePerfCounter(&cacheStats->noWraparound, 1 /* delta */); if (!foundVictim) { /* * We looped around and did not manage to evict any entries. * Double the amount we decrement eviction candidate's utility by. * This makes the eviction algorithm look for a victim more aggressively */ if (decAmount <= CACHE_MAX_UTILITY / 2) { decAmount = 2 * decAmount; } else { decAmount = CACHE_MAX_UTILITY; } } foundVictim = false; if (unsuccessfulLoops > cache->cacheHdr->policyContext.maxClockLoops) { /* Can't find any cached and unused entries candidates for evictions, even after looping around * maxClockLoops times. Give up looking for victims. */ Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict); break; } } CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx); if (crtEntry->state != CACHE_ENTRY_CACHED) { /* Not interested in free/acquired/deleted entries. Go back and advance clock hand */ continue; } CacheAnchor *anchor = (CacheAnchor *) SyncHTLookup(cache->syncHashtable, &crtEntry->hashvalue); if (NULL == anchor) { /* There's no anchor for this entry, someone might have snatched it in the meantime */ continue; } SpinLockAcquire(&anchor->spinlock); if (crtEntry->state != CACHE_ENTRY_CACHED) { /* Someone freed this entry in the meantime, before we got a chance to acquire the anchor lock */ SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); continue; } /* Ok, did all the checks, this entry must be valid now */ CACHE_ASSERT_VALID(crtEntry); if (crtEntry->pinCount > 0) { /* Entry is in use and can't be evicted. Go back and advance clock hand */ SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); continue; } /* Decrement utility */ gp_atomic_dec_positive_32(&crtEntry->utility, decAmount); /* Just decremented someone's utility. Reset our unsuccessful loops counter */ unsuccessfulLoops = 0; if (crtEntry->utility > 0) { /* Entry has non-zero utility, we shouldn't evict it. Go back and advance clock hand */ SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); continue; } /* Found our victim */ Assert(0 == crtEntry->pinCount); CACHE_ASSERT_VALID(crtEntry); Assert(crtEntry->utility == 0); #if USE_ASSERT_CHECKING int32 casResult = #endif compare_and_swap_32(&crtEntry->state, CACHE_ENTRY_CACHED, CACHE_ENTRY_DELETED); Assert(1 == casResult); SpinLockRelease(&anchor->spinlock); foundVictim = true; evictedSize += crtEntry->size; /* Don't update noFreeEntries yet. It will be done in Cache_AddToFreelist */ Cache_UpdatePerfCounter(&cacheStats->noCachedEntries, -1 /* delta */); /* Unlink entry from the anchor chain */ SpinLockAcquire(&anchor->spinlock); Cache_UnlinkEntry(cache, anchor, crtEntry); SpinLockRelease(&anchor->spinlock); SyncHTRelease(cache->syncHashtable, (void *) anchor); if (NULL != cache->cleanupEntry) { /* Call client-side cleanup for entry */ cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(crtEntry)); } Cache_LockEntry(cache, crtEntry); Assert(crtEntry->state == CACHE_ENTRY_DELETED); crtEntry->state = CACHE_ENTRY_FREE; #if USE_ASSERT_CHECKING Cache_MemsetPayload(cache, crtEntry); #endif Cache_UnlockEntry(cache, crtEntry); Cache_AddToFreelist(cache, crtEntry); Cache_UpdatePerfCounter(&cacheStats->noEvicts, 1 /* delta */); Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict); if (evictedSize >= evictRequestSize) { /* We evicted as much as requested */ break; } Cache_TimedOperationStart(); } return evictedSize; }
/* * Finds and notifies the top vmem consuming session. */ static void RedZoneHandler_FlagTopConsumer() { if (!vmemTrackerInited) { return; } Assert(NULL != MySessionState); bool success = compare_and_swap_32((uint32*) isRunawayDetector, 0, 1); /* If successful then this process must be the runaway detector */ AssertImply(success, 1 == *isRunawayDetector); /* * Someone already determined the runaway query, so nothing to do. This * will also prevent re-entry to this method by a cleaning session. */ if (!success) { return; } /* * Grabbing a shared lock prevents others to modify the SessionState * data structure, therefore ensuring that we don't flag someone * who was already dying. A shared lock is enough as we access the * data structure in a read-only manner. */ LWLockAcquire(SessionStateLock, LW_SHARED); int32 maxVmem = 0; int32 maxActiveVmem = 0; SessionState *maxActiveVmemSessionState = NULL; SessionState *maxVmemSessionState = NULL; SessionState *curSessionState = AllSessionStateEntries->usedList; while (curSessionState != NULL) { Assert(INVALID_SESSION_ID != curSessionState->sessionId); int32 curVmem = curSessionState->sessionVmem; Assert(maxActiveVmem <= maxVmem); if (curVmem > maxActiveVmem) { if (curVmem > maxVmem) { maxVmemSessionState = curSessionState; maxVmem = curVmem; } /* * Only consider sessions with at least 1 active process. As we * are *not* grabbings locks, this does not guarantee that by the * time we finish walking all sessions the chosen session will * still have active process. */ if (curSessionState->activeProcessCount > 0) { maxActiveVmemSessionState = curSessionState; maxActiveVmem = curVmem; } } curSessionState = curSessionState->next; } if (NULL != maxActiveVmemSessionState) { SpinLockAcquire(&maxActiveVmemSessionState->spinLock); /* * Now that we grabbed lock, make sure we have at least 1 active process * before flagging this session for termination */ if (0 < maxActiveVmemSessionState->activeProcessCount) { /* * First update the runaway event detection version so that * an active process of the runaway session is forced to clean up before * it deactivates. As we grabbed the spin lock, no process of the runaway * session can deactivate unless we release the lock. The other sessions * don't care what global runaway version they observe as the runaway * event is not pertinent to them. * * We don't need any lock here as the runaway detector is singleton, * and only the detector can update this variable. */ *latestRunawayVersion = *CurrentVersion + 1; /* * Make sure that the runaway event version is not shared with any other * processes, and not shared with any other deactivation/reactivation version */ *CurrentVersion = *CurrentVersion + 2; Assert(CLEANUP_COUNTDOWN_BEFORE_RUNAWAY == maxActiveVmemSessionState->cleanupCountdown); /* * Determine how many processes need to cleanup to mark the session clean. */ maxActiveVmemSessionState->cleanupCountdown = maxActiveVmemSessionState->activeProcessCount; if (maxActiveVmemSessionState == maxVmemSessionState) { /* Finally signal the runaway process for cleanup */ maxActiveVmemSessionState->runawayStatus = RunawayStatus_PrimaryRunawaySession; } else { maxActiveVmemSessionState->runawayStatus = RunawayStatus_SecondaryRunawaySession; } /* Save the amount of vmem session was holding when it was flagged as runaway */ maxActiveVmemSessionState->sessionVmemRunaway = maxActiveVmemSessionState->sessionVmem; /* Save the command count currently running in the runaway session */ maxActiveVmemSessionState->commandCountRunaway = gp_command_count; } else { /* * Failed to find any viable runaway session. Reset runaway detector flag * for another round of runaway determination at a later time. As we couldn't * find any runaway session, the CurrentVersion is not changed. */ *isRunawayDetector = 0; } SpinLockRelease(&maxActiveVmemSessionState->spinLock); } else { /* * No active session to mark as runaway. So, reenable the runaway detection process */ *isRunawayDetector = 0; } LWLockRelease(SessionStateLock); }
/* * Inserts a previously acquired entry in the cache. * * This function should never fail. */ void Cache_Insert(Cache *cache, CacheEntry *entry) { Assert(NULL != cache); Assert(NULL != entry); Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats; Cache_TimedOperationStart(); Cache_UpdatePerfCounter(&cacheStats->noInserts, 1 /* delta */); Cache_UpdatePerfCounter(&cacheStats->noCachedEntries, 1 /* delta */); Cache_UpdatePerfCounter(&cacheStats->noAcquiredEntries, -1 /* delta */); Cache_UpdatePerfCounter64(&cacheStats->totalEntrySize, entry->size); Cache_ComputeEntryHashcode(cache, entry); /* Look up or insert anchor element for this entry */ bool existing = false; volatile CacheAnchor *anchor = SyncHTInsert(cache->syncHashtable, &entry->hashvalue, &existing); /* * This should never happen since the SyncHT has as many entries as the SharedCache, * and we'll run out of SharedCache entries before we fill up the SyncHT */ insist_log(NULL != anchor, "Could not insert in the cache: SyncHT full"); /* Acquire anchor lock to touch the chain */ SpinLockAcquire(&anchor->spinlock); if (NULL == anchor->firstEntry) { Assert(NULL == anchor->lastEntry); anchor->firstEntry = anchor->lastEntry = entry; } else { Assert(NULL != anchor->lastEntry); anchor->lastEntry->nextEntry = entry; anchor->lastEntry = entry; } entry->nextEntry = NULL; Cache_EntryAddRef(cache, entry); #ifdef USE_ASSERT_CHECKING int32 casResult = #endif compare_and_swap_32(&entry->state, CACHE_ENTRY_ACQUIRED, CACHE_ENTRY_CACHED); Assert(1 == casResult); Assert(NULL != anchor->firstEntry && NULL != anchor->lastEntry); SpinLockRelease(&anchor->spinlock); #ifdef USE_ASSERT_CHECKING bool deleted = #endif SyncHTRelease(cache->syncHashtable, (void *) anchor); Assert(!deleted); Cache_TimedOperationRecord(&cacheStats->timeInserts, &cacheStats->maxTimeInsert); }
/* * Marks the current process as clean. If all the processes are marked * as clean for this session (i.e., cleanupCountdown == 0 in the * MySessionState) then we reset session's runaway status as well as * the runaway detector flag (i.e., a new runaway detector can run). * * Parameters: * ignoredCleanup: whether the cleanup was ignored, i.e., no elog(ERROR, ...) * was thrown. In such case a deactivated process is not reactivated as the * deactivation didn't get interrupted. */ void RunawayCleaner_RunawayCleanupDoneForProcess(bool ignoredCleanup) { /* * We don't do anything if we don't have an ongoing cleanup, or we already finished * cleanup once for the current runaway event */ if (beginCleanupRunawayVersion != *latestRunawayVersion || endCleanupRunawayVersion == beginCleanupRunawayVersion) { /* Either we never started cleanup, or we already finished */ return; } /* Disable repeating call */ endCleanupRunawayVersion = beginCleanupRunawayVersion; Assert(NULL != MySessionState); /* * As the current cleanup holds leverage on the cleanupCountdown, * the session must stay as runaway at least until the current * process marks itself clean */ Assert(MySessionState->runawayStatus != RunawayStatus_NotRunaway); /* We only cleanup if we were active when the runaway event happened */ Assert((!isProcessActive && *latestRunawayVersion < deactivationVersion && *latestRunawayVersion > activationVersion) || (*latestRunawayVersion > activationVersion && (activationVersion >= deactivationVersion && isProcessActive))); /* * We don't reactivate if the process is already active or a deactivated * process never errored out during deactivation (i.e., failed to complete * deactivation) */ if (!isProcessActive && !ignoredCleanup) { Assert(1 == *isRunawayDetector); Assert(0 < MySessionState->cleanupCountdown); /* * As the process threw ERROR instead of going into ReadCommand() blocking * state, we have to reactivate the process from its current Deactivated * state */ IdleTracker_ActivateProcess(); } Assert(0 < MySessionState->cleanupCountdown); #if USE_ASSERT_CHECKING int cleanProgress = #endif gp_atomic_add_32(&MySessionState->cleanupCountdown, -1); Assert(0 <= cleanProgress); bool finalCleaner = compare_and_swap_32((uint32*) &MySessionState->cleanupCountdown, 0, CLEANUP_COUNTDOWN_BEFORE_RUNAWAY); if (finalCleaner) { /* * The final cleaner is responsible to reset the runaway flag, * and enable the runaway detection process. */ RunawayCleaner_RunawayCleanupDoneForSession(); } /* * Finally we are done with all critical cleanup, which includes releasing all our memory and * releasing our cleanup counter so that another session can be marked as runaway, if needed. * Now, we have some head room to actually record our usage. */ write_stderr("Logging memory usage because of runaway cleanup. Note, this is a post-cleanup logging and may be incomplete."); MemoryAccounting_SaveToLog(); MemoryContextStats(TopMemoryContext); }
/* * InitAuxiliaryProcess -- create a per-auxiliary-process data structure * * This is called by bgwriter and similar processes so that they will have a * MyProc value that's real enough to let them wait for LWLocks. The PGPROC * and sema that are assigned are one of the extra ones created during * InitProcGlobal. * * Auxiliary processes are presently not expected to wait for real (lockmgr) * locks, so we need not set up the deadlock checker. They are never added * to the ProcArray or the sinval messaging mechanism, either. They also * don't get a VXID assigned, since this is only useful when we actually * hold lockmgr locks. */ void InitAuxiliaryProcess(void) { PGPROC *auxproc; int proctype; int i; /* * ProcGlobal should be set up already (if we are a backend, we inherit * this by fork() or EXEC_BACKEND mechanism from the postmaster). */ if (ProcGlobal == NULL || AuxiliaryProcs == NULL) elog(PANIC, "proc header uninitialized"); if (MyProc != NULL) elog(ERROR, "you already exist"); /* * Find a free auxproc entry. Use compare_and_swap to avoid locking. */ for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++) { auxproc = &AuxiliaryProcs[proctype]; if (compare_and_swap_32((uint32*)(&(auxproc->pid)), 0, MyProcPid)) { /* Find a free entry, break here. */ break; } } if (proctype >= NUM_AUXILIARY_PROCS) { elog(FATAL, "all AuxiliaryProcs are in use"); } set_spins_per_delay(ProcGlobal->spins_per_delay); MyProc = auxproc; lockHolderProcPtr = auxproc; /* * Initialize all fields of MyProc, except for the semaphore which was * prepared for us by InitProcGlobal. */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyProc->mppLocalProcessSerial = 0; MyProc->mppSessionId = 0; MyProc->mppIsWriter = false; MyProc->inVacuum = false; MyProc->postmasterResetRequired = true; MyProc->lwWaiting = false; MyProc->lwExclusive = false; MyProc->lwWaitLink = NULL; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; for (i = 0; i < NUM_LOCK_PARTITIONS; i++) SHMQueueInit(&(MyProc->myProcLocks[i])); /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly * necessary anymore, but seems like a good idea for cleanliness.) */ PGSemaphoreReset(&MyProc->sem); MyProc->queryCommandId = -1; /* * Arrange to clean up at process exit. */ on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype)); }