示例#1
0
文件: memprot.c 项目: ricky-wu/gpdb
/*
 * UpdateTimeAtomically
 *
 * Updates a OOMTimeType variable atomically, using compare_and_swap_*
 */
void UpdateTimeAtomically(volatile OOMTimeType* time_var)
{
	bool updateCompleted = false;

	OOMTimeType newOOMTime;

	while (!updateCompleted)
	{
#if defined(__x86_64__)
		newOOMTime = GetCurrentTimestamp();
#else
		struct timeval curTime;
		gettimeofday(&curTime, NULL);

		newOOMTime = (uint32)curTime.tv_sec;
#endif
		OOMTimeType oldOOMTime = *time_var;

#if defined(__x86_64__)
		updateCompleted = compare_and_swap_64((uint64*)time_var,
				(uint64)oldOOMTime,
				(uint64)newOOMTime);
#else
		updateCompleted = compare_and_swap_32((uint32*)time_var,
				(uint32)oldOOMTime,
				(uint32)newOOMTime);
#endif
	}
}
/*
 * Retrieve a new cache entry from the pre-allocated freelist.
 * The client has to either insert the entry in the cache or surrender it.
 *
 * This function calls the populateEntry callback function to populate the
 * entry before returning it to the client.
 *
 * populate_param is the opaque parameter to be passed to the populateEntry function.
 *
 * Return NULL if freelist is empty.
 *
 */
CacheEntry *
Cache_AcquireEntry(Cache *cache, void *populate_param)
{
	Assert(NULL != cache);

	CacheEntry *newEntry = Cache_GetFreeElement(cache);
	if (NULL == newEntry)
	{
		return NULL;
	}

	CACHE_ASSERT_WIPED(newEntry);


#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif
	compare_and_swap_32(&newEntry->state, CACHE_ENTRY_FREE, CACHE_ENTRY_RESERVED);
	Assert(1 == casResult);

	/*
	 * In RESERVED state nobody else will try to read this entry, not even
	 * the views. No need to lock the entry while populating.
	 */

	if (cache->populateEntry)
	{
		cache->populateEntry(CACHE_ENTRY_PAYLOAD(newEntry), populate_param);
	}

#ifdef USE_ASSERT_CHECKING
	casResult =
#endif
	compare_and_swap_32(&newEntry->state, CACHE_ENTRY_RESERVED, CACHE_ENTRY_ACQUIRED);
	Assert(1 == casResult);

	Cache_RegisterCleanup(cache, newEntry, false /* isCachedEntry */ );

	return newEntry;
}
/*
 * Mark an entry for removal from the cache.
 * The entry is not immediately deleted, as there is at least one client
 * using it.
 * The entry will not be found using look-up operations after this step.
 * The entry will physically be removed once all using clients release it.
 *
 * This function is not synchronized. Multiple clients can mark an entry
 * deleted.
 */
void
Cache_Remove(Cache *cache, CacheEntry *entry)
{
	Assert(NULL != entry);

#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif
	compare_and_swap_32(&entry->state, CACHE_ENTRY_CACHED, CACHE_ENTRY_DELETED);
	Assert(casResult == 1);

	Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noCachedEntries, -1 /* delta */);
	Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noDeletedEntries, 1 /* delta */);
}
示例#4
0
/*
 * update_spins_per_delay
 *   Update spins_per_delay value in ProcGlobal.
 */
static void update_spins_per_delay()
{
	volatile PROC_HDR *procglobal = ProcGlobal;
	bool casResult = false;

	while (!casResult)
	{
		int old_spins_per_delay = procglobal->spins_per_delay;
		int new_spins_per_delay = recompute_spins_per_delay(old_spins_per_delay);
		casResult = compare_and_swap_32((uint32*)&procglobal->spins_per_delay,
										old_spins_per_delay,
										new_spins_per_delay);
	}
}
/*
 * Run cache eviction algorithm
 *
 * It will try to evict enough entries to add up to evictSize. Returns the
 * actual accumulated size of the entries evicted
 */
int64
Cache_Evict(Cache *cache, int64 evictRequestSize)
{
	Assert(NULL != cache);
	Assert(evictRequestSize > 0);

	Cache_TimedOperationStart();

	int64 evictedSize = 0;
	uint32 unsuccessfulLoops = 0;
	bool foundVictim = false;
	uint32 decAmount = cache->cacheHdr->policyContext.utilityDecrement;
	Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats;

	while (true)
	{

		bool wraparound = false;
		int32 entryIdx = Cache_NextClockHand(cache, &wraparound);
		Assert(entryIdx < cache->cacheHdr->nEntries);

		Cache_UpdatePerfCounter(&cacheStats->noEntriesScanned,1 /* delta */);

		if (wraparound)
		{
			unsuccessfulLoops++;

			Cache_UpdatePerfCounter(&cacheStats->noWraparound, 1 /* delta */);

			if (!foundVictim)
			{
				/*
				 * We looped around and did not manage to evict any entries.
				 * Double the amount we decrement eviction candidate's utility by.
				 * This makes the eviction algorithm look for a victim more aggressively
				 */
				if (decAmount <= CACHE_MAX_UTILITY / 2)
				{
					decAmount = 2 * decAmount;
				}
				else
				{
					decAmount = CACHE_MAX_UTILITY;
				}
			}
			foundVictim = false;

			if (unsuccessfulLoops > cache->cacheHdr->policyContext.maxClockLoops)
			{
				/* Can't find any cached and unused entries candidates for evictions, even after looping around
				 * maxClockLoops times. Give up looking for victims. */
				Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);
				break;
			}
		}

		CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx);
		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Not interested in free/acquired/deleted entries. Go back and advance clock hand */
			continue;
		}

		CacheAnchor *anchor = (CacheAnchor *) SyncHTLookup(cache->syncHashtable, &crtEntry->hashvalue);
		if (NULL == anchor)
		{
			/* There's no anchor for this entry, someone might have snatched it in the meantime */
			continue;
		}

		SpinLockAcquire(&anchor->spinlock);

		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Someone freed this entry in the meantime, before we got a chance to acquire the anchor lock */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Ok, did all the checks, this entry must be valid now */
		CACHE_ASSERT_VALID(crtEntry);

		if (crtEntry->pinCount > 0)
		{
			/* Entry is in use and can't be evicted. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Decrement utility */
		gp_atomic_dec_positive_32(&crtEntry->utility, decAmount);
		/* Just decremented someone's utility. Reset our unsuccessful loops counter */
		unsuccessfulLoops = 0;

		if (crtEntry->utility > 0)
		{
			/* Entry has non-zero utility, we shouldn't evict it. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Found our victim */
		Assert(0 == crtEntry->pinCount);
		CACHE_ASSERT_VALID(crtEntry);
		Assert(crtEntry->utility == 0);

#if USE_ASSERT_CHECKING
		int32 casResult =
#endif
		compare_and_swap_32(&crtEntry->state, CACHE_ENTRY_CACHED, CACHE_ENTRY_DELETED);
		Assert(1 == casResult);

		SpinLockRelease(&anchor->spinlock);
		foundVictim = true;
		evictedSize += crtEntry->size;

		/* Don't update noFreeEntries yet. It will be done in Cache_AddToFreelist */
		Cache_UpdatePerfCounter(&cacheStats->noCachedEntries, -1 /* delta */);

		/* Unlink entry from the anchor chain */
		SpinLockAcquire(&anchor->spinlock);
		Cache_UnlinkEntry(cache, anchor, crtEntry);
		SpinLockRelease(&anchor->spinlock);

		SyncHTRelease(cache->syncHashtable, (void *) anchor);

		if (NULL != cache->cleanupEntry)
		{
			/* Call client-side cleanup for entry */
			cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(crtEntry));
		}

		Cache_LockEntry(cache, crtEntry);

		Assert(crtEntry->state == CACHE_ENTRY_DELETED);
		crtEntry->state = CACHE_ENTRY_FREE;

#if USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, crtEntry);
#endif

		Cache_UnlockEntry(cache, crtEntry);

		Cache_AddToFreelist(cache, crtEntry);

		Cache_UpdatePerfCounter(&cacheStats->noEvicts, 1 /* delta */);
		Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);

		if (evictedSize >= evictRequestSize)
		{
			/* We evicted as much as requested */
			break;
		}

		Cache_TimedOperationStart();

	}

	return evictedSize;
}
示例#6
0
/*
 * Finds and notifies the top vmem consuming session.
 */
static void
RedZoneHandler_FlagTopConsumer()
{
	if (!vmemTrackerInited)
	{
		return;
	}

	Assert(NULL != MySessionState);

	bool success = compare_and_swap_32((uint32*) isRunawayDetector, 0, 1);

	/* If successful then this process must be the runaway detector */
	AssertImply(success, 1 == *isRunawayDetector);

	/*
	 * Someone already determined the runaway query, so nothing to do. This
	 * will also prevent re-entry to this method by a cleaning session.
	 */
	if (!success)
	{
		return;
	}

	/*
	 * Grabbing a shared lock prevents others to modify the SessionState
	 * data structure, therefore ensuring that we don't flag someone
	 * who was already dying. A shared lock is enough as we access the
	 * data structure in a read-only manner.
	 */
	LWLockAcquire(SessionStateLock, LW_SHARED);

	int32 maxVmem = 0;
	int32 maxActiveVmem = 0;
	SessionState *maxActiveVmemSessionState = NULL;
	SessionState *maxVmemSessionState = NULL;

	SessionState *curSessionState = AllSessionStateEntries->usedList;

	while (curSessionState != NULL)
	{
		Assert(INVALID_SESSION_ID != curSessionState->sessionId);

		int32 curVmem = curSessionState->sessionVmem;

		Assert(maxActiveVmem <= maxVmem);

		if (curVmem > maxActiveVmem)
		{
			if (curVmem > maxVmem)
			{
				maxVmemSessionState = curSessionState;
				maxVmem = curVmem;
			}

			/*
			 * Only consider sessions with at least 1 active process. As we
			 * are *not* grabbings locks, this does not guarantee that by the
			 * time we finish walking all sessions the chosen session will
			 * still have active process.
			 */
			if  (curSessionState->activeProcessCount > 0)
			{
				maxActiveVmemSessionState = curSessionState;
				maxActiveVmem = curVmem;
			}
		}

		curSessionState = curSessionState->next;
	}

	if (NULL != maxActiveVmemSessionState)
	{
		SpinLockAcquire(&maxActiveVmemSessionState->spinLock);

		/*
		 * Now that we grabbed lock, make sure we have at least 1 active process
		 * before flagging this session for termination
		 */
		if (0 < maxActiveVmemSessionState->activeProcessCount)
		{
			/*
			 * First update the runaway event detection version so that
			 * an active process of the runaway session is forced to clean up before
			 * it deactivates. As we grabbed the spin lock, no process of the runaway
			 * session can deactivate unless we release the lock. The other sessions
			 * don't care what global runaway version they observe as the runaway
			 * event is not pertinent to them.
			 *
			 * We don't need any lock here as the runaway detector is singleton,
			 * and only the detector can update this variable.
			 */
			*latestRunawayVersion = *CurrentVersion + 1;
			/*
			 * Make sure that the runaway event version is not shared with any other
			 * processes, and not shared with any other deactivation/reactivation version
			 */
			*CurrentVersion = *CurrentVersion + 2;

			Assert(CLEANUP_COUNTDOWN_BEFORE_RUNAWAY == maxActiveVmemSessionState->cleanupCountdown);
			/*
			 * Determine how many processes need to cleanup to mark the session clean.
			 */
			maxActiveVmemSessionState->cleanupCountdown = maxActiveVmemSessionState->activeProcessCount;

			if (maxActiveVmemSessionState == maxVmemSessionState)
			{
				/* Finally signal the runaway process for cleanup */
				maxActiveVmemSessionState->runawayStatus = RunawayStatus_PrimaryRunawaySession;
			}
			else
			{
				maxActiveVmemSessionState->runawayStatus = RunawayStatus_SecondaryRunawaySession;
			}

			/* Save the amount of vmem session was holding when it was flagged as runaway */
			maxActiveVmemSessionState->sessionVmemRunaway = maxActiveVmemSessionState->sessionVmem;

			/* Save the command count currently running in the runaway session */
			maxActiveVmemSessionState->commandCountRunaway = gp_command_count;
		}
		else
		{
			/*
			 * Failed to find any viable runaway session. Reset runaway detector flag
			 * for another round of runaway determination at a later time. As we couldn't
			 * find any runaway session, the CurrentVersion is not changed.
			 */
			*isRunawayDetector = 0;
		}

		SpinLockRelease(&maxActiveVmemSessionState->spinLock);
	}
	else
	{
		/*
		 * No active session to mark as runaway. So, reenable the runaway detection process
		 */
		*isRunawayDetector = 0;
	}

	LWLockRelease(SessionStateLock);
}
/*
 * Inserts a previously acquired entry in the cache.
 *
 * This function should never fail.
 */
void
Cache_Insert(Cache *cache, CacheEntry *entry)
{
	Assert(NULL != cache);
	Assert(NULL != entry);

	Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats;
	Cache_TimedOperationStart();
	Cache_UpdatePerfCounter(&cacheStats->noInserts, 1 /* delta */);
	Cache_UpdatePerfCounter(&cacheStats->noCachedEntries, 1 /* delta */);
	Cache_UpdatePerfCounter(&cacheStats->noAcquiredEntries, -1 /* delta */);
	Cache_UpdatePerfCounter64(&cacheStats->totalEntrySize, entry->size);

	Cache_ComputeEntryHashcode(cache, entry);

	/* Look up or insert anchor element for this entry */
	bool existing = false;
	volatile CacheAnchor *anchor = SyncHTInsert(cache->syncHashtable, &entry->hashvalue, &existing);
	/*
	 * This should never happen since the SyncHT has as many entries as the SharedCache,
	 * and we'll run out of SharedCache entries before we fill up the SyncHT
	 */
	insist_log(NULL != anchor, "Could not insert in the cache: SyncHT full");

	/* Acquire anchor lock to touch the chain */
	SpinLockAcquire(&anchor->spinlock);

	if (NULL == anchor->firstEntry)
	{
		Assert(NULL == anchor->lastEntry);
		anchor->firstEntry = anchor->lastEntry = entry;
	}
	else
	{
		Assert(NULL != anchor->lastEntry);
		anchor->lastEntry->nextEntry = entry;
		anchor->lastEntry = entry;
	}
	entry->nextEntry = NULL;

	Cache_EntryAddRef(cache, entry);

#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif

	compare_and_swap_32(&entry->state, CACHE_ENTRY_ACQUIRED, CACHE_ENTRY_CACHED);
	Assert(1 == casResult);
	Assert(NULL != anchor->firstEntry && NULL != anchor->lastEntry);

	SpinLockRelease(&anchor->spinlock);

#ifdef USE_ASSERT_CHECKING
	bool deleted = 
#endif
	SyncHTRelease(cache->syncHashtable, (void *) anchor);
	Assert(!deleted);

	Cache_TimedOperationRecord(&cacheStats->timeInserts,
			&cacheStats->maxTimeInsert);
}
/*
 * Marks the current process as clean. If all the processes are marked
 * as clean for this session (i.e., cleanupCountdown == 0 in the
 * MySessionState) then we reset session's runaway status as well as
 * the runaway detector flag (i.e., a new runaway detector can run).
 *
 * Parameters:
 * 		ignoredCleanup: whether the cleanup was ignored, i.e., no elog(ERROR, ...)
 * 		was thrown. In such case a deactivated process is not reactivated as the
 * 		deactivation didn't get interrupted.
 */
void
RunawayCleaner_RunawayCleanupDoneForProcess(bool ignoredCleanup)
{
	/*
	 * We don't do anything if we don't have an ongoing cleanup, or we already finished
	 * cleanup once for the current runaway event
	 */
	if (beginCleanupRunawayVersion != *latestRunawayVersion ||
			endCleanupRunawayVersion == beginCleanupRunawayVersion)
	{
		/* Either we never started cleanup, or we already finished */
		return;
	}

	/* Disable repeating call */
	endCleanupRunawayVersion = beginCleanupRunawayVersion;

	Assert(NULL != MySessionState);
	/*
	 * As the current cleanup holds leverage on the  cleanupCountdown,
	 * the session must stay as runaway at least until the current
	 * process marks itself clean
	 */
	Assert(MySessionState->runawayStatus != RunawayStatus_NotRunaway);

	/* We only cleanup if we were active when the runaway event happened */
	Assert((!isProcessActive && *latestRunawayVersion < deactivationVersion &&
			*latestRunawayVersion > activationVersion) ||
			(*latestRunawayVersion > activationVersion &&
			(activationVersion >= deactivationVersion && isProcessActive)));

	/*
	 * We don't reactivate if the process is already active or a deactivated
	 * process never errored out during deactivation (i.e., failed to complete
	 * deactivation)
	 */
	if (!isProcessActive && !ignoredCleanup)
	{
		Assert(1 == *isRunawayDetector);
		Assert(0 < MySessionState->cleanupCountdown);
		/*
		 * As the process threw ERROR instead of going into ReadCommand() blocking
		 * state, we have to reactivate the process from its current Deactivated
		 * state
		 */
		IdleTracker_ActivateProcess();
	}

	Assert(0 < MySessionState->cleanupCountdown);
#if USE_ASSERT_CHECKING
	int cleanProgress =
#endif
			gp_atomic_add_32(&MySessionState->cleanupCountdown, -1);
	Assert(0 <= cleanProgress);

	bool finalCleaner = compare_and_swap_32((uint32*) &MySessionState->cleanupCountdown,
			0, CLEANUP_COUNTDOWN_BEFORE_RUNAWAY);

	if (finalCleaner)
	{
		/*
		 * The final cleaner is responsible to reset the runaway flag,
		 * and enable the runaway detection process.
		 */
		RunawayCleaner_RunawayCleanupDoneForSession();
	}

	/*
	 * Finally we are done with all critical cleanup, which includes releasing all our memory and
	 * releasing our cleanup counter so that another session can be marked as runaway, if needed.
	 * Now, we have some head room to actually record our usage.
	 */
	write_stderr("Logging memory usage because of runaway cleanup. Note, this is a post-cleanup logging and may be incomplete.");
	MemoryAccounting_SaveToLog();
	MemoryContextStats(TopMemoryContext);
}
示例#9
0
/*
 * InitAuxiliaryProcess -- create a per-auxiliary-process data structure
 *
 * This is called by bgwriter and similar processes so that they will have a
 * MyProc value that's real enough to let them wait for LWLocks.  The PGPROC
 * and sema that are assigned are one of the extra ones created during
 * InitProcGlobal.
 *
 * Auxiliary processes are presently not expected to wait for real (lockmgr)
 * locks, so we need not set up the deadlock checker.  They are never added
 * to the ProcArray or the sinval messaging mechanism, either.	They also
 * don't get a VXID assigned, since this is only useful when we actually
 * hold lockmgr locks.
 */
void
InitAuxiliaryProcess(void)
{
	PGPROC	   *auxproc;
	int			proctype;
	int			i;

	/*
	 * ProcGlobal should be set up already (if we are a backend, we inherit
	 * this by fork() or EXEC_BACKEND mechanism from the postmaster).
	 */
	if (ProcGlobal == NULL || AuxiliaryProcs == NULL)
		elog(PANIC, "proc header uninitialized");

	if (MyProc != NULL)
		elog(ERROR, "you already exist");

	/*
	 * Find a free auxproc entry. Use compare_and_swap to avoid locking.
	 */
	for (proctype = 0; proctype < NUM_AUXILIARY_PROCS; proctype++)
	{
		auxproc = &AuxiliaryProcs[proctype];
		if (compare_and_swap_32((uint32*)(&(auxproc->pid)),
								0,
								MyProcPid))
		{
			/* Find a free entry, break here. */
			break;
		}
	}
	
	if (proctype >= NUM_AUXILIARY_PROCS)
	{
		elog(FATAL, "all AuxiliaryProcs are in use");
	}

	set_spins_per_delay(ProcGlobal->spins_per_delay);

	MyProc = auxproc;
	lockHolderProcPtr = auxproc;

	/*
	 * Initialize all fields of MyProc, except for the semaphore which was
	 * prepared for us by InitProcGlobal.
	 */
	SHMQueueElemInit(&(MyProc->links));
	MyProc->waitStatus = STATUS_OK;
	MyProc->xid = InvalidTransactionId;
	MyProc->xmin = InvalidTransactionId;
	MyProc->databaseId = InvalidOid;
	MyProc->roleId = InvalidOid;
    MyProc->mppLocalProcessSerial = 0;
    MyProc->mppSessionId = 0;
    MyProc->mppIsWriter = false;
	MyProc->inVacuum = false;
	MyProc->postmasterResetRequired = true;
	MyProc->lwWaiting = false;
	MyProc->lwExclusive = false;
	MyProc->lwWaitLink = NULL;
	MyProc->waitLock = NULL;
	MyProc->waitProcLock = NULL;
	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
		SHMQueueInit(&(MyProc->myProcLocks[i]));

	/*
	 * We might be reusing a semaphore that belonged to a failed process. So
	 * be careful and reinitialize its value here.	(This is not strictly
	 * necessary anymore, but seems like a good idea for cleanliness.)
	 */
	PGSemaphoreReset(&MyProc->sem);

	MyProc->queryCommandId = -1;

	/*
	 * Arrange to clean up at process exit.
	 */
	on_shmem_exit(AuxiliaryProcKill, Int32GetDatum(proctype));
}