Example #1
0
/*
 * ClockSweepTick - Helper routine for StrategyGetBuffer()
 *
 * Move the clock hand one buffer ahead of its current position and return the
 * id of the buffer now under the hand.
 */
static inline uint32
ClockSweepTick(void)
{
	uint32 victim;

	/*
	 * Atomically move hand ahead one buffer - if there's several processes
	 * doing this, this can lead to buffers being returned slightly out of
	 * apparent order.
	 */
	victim =
		pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);

	if (victim >= NBuffers)
	{
		uint32 originalVictim = victim;

		/* always wrap what we look up in BufferDescriptors */
		victim = victim % NBuffers;

		/*
		 * If we're the one that just caused a wraparound, force
		 * completePasses to be incremented while holding the spinlock. We
		 * need the spinlock so StrategySyncStart() can return a consistent
		 * value consisting of nextVictimBuffer and completePasses.
		 */
		if (victim == 0)
		{
			uint32 expected;
			uint32 wrapped;
			bool success = false;

			expected = originalVictim + 1;

			while (!success)
			{
				/*
				 * Acquire the spinlock while increasing completePasses. That
				 * allows other readers to read nextVictimBuffer and
				 * completePasses in a consistent manner which is required for
				 * StrategySyncStart().  In theory delaying the increment
				 * could lead to an overflow of nextVictimBuffers, but that's
				 * highly unlikely and wouldn't be particularly harmful.
				 */
				SpinLockAcquire(&StrategyControl->buffer_strategy_lock);

				wrapped = expected % NBuffers;

				success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
														 &expected, wrapped);
				if (success)
					StrategyControl->completePasses++;
				SpinLockRelease(&StrategyControl->buffer_strategy_lock);
			}
		}
	}
	return victim;
}
Example #2
0
/*
 * Retrieve a new cache entry from the pre-allocated freelist.
 * The client has to either insert the entry in the cache or surrender it.
 *
 * This function calls the populateEntry callback function to populate the
 * entry before returning it to the client.
 *
 * populate_param is the opaque parameter to be passed to the populateEntry function.
 *
 * Return NULL if freelist is empty.
 *
 */
CacheEntry *
Cache_AcquireEntry(Cache *cache, void *populate_param)
{
	Assert(NULL != cache);

	CacheEntry *newEntry = Cache_GetFreeElement(cache);
	if (NULL == newEntry)
	{
		return NULL;
	}

	CACHE_ASSERT_WIPED(newEntry);

	uint32 expected = CACHE_ENTRY_FREE;
#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif
	pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&newEntry->state, &expected, CACHE_ENTRY_RESERVED);
	Assert(1 == casResult);

	/*
	 * In RESERVED state nobody else will try to read this entry, not even
	 * the views. No need to lock the entry while populating.
	 */

	if (cache->populateEntry)
	{
		cache->populateEntry(CACHE_ENTRY_PAYLOAD(newEntry), populate_param);
	}

	expected = CACHE_ENTRY_RESERVED;
#ifdef USE_ASSERT_CHECKING
	casResult =
#endif
	pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&newEntry->state, &expected, CACHE_ENTRY_ACQUIRED);
	Assert(1 == casResult);

	Cache_RegisterCleanup(cache, newEntry, false /* isCachedEntry */ );

	return newEntry;
}
Example #3
0
/*
 * Mark an entry for removal from the cache.
 * The entry is not immediately deleted, as there is at least one client
 * using it.
 * The entry will not be found using look-up operations after this step.
 * The entry will physically be removed once all using clients release it.
 *
 * This function is not synchronized. Multiple clients can mark an entry
 * deleted.
 */
void
Cache_Remove(Cache *cache, CacheEntry *entry)
{
	Assert(NULL != entry);

	uint32 expected = CACHE_ENTRY_CACHED;
#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif
	pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&entry->state, &expected, CACHE_ENTRY_DELETED);
	Assert(casResult == 1);

	Cache_DecPerfCounter(&cache->cacheHdr->cacheStats.noCachedEntries, 1 /* delta */);
	Cache_AddPerfCounter(&cache->cacheHdr->cacheStats.noDeletedEntries, 1 /* delta */);
}
Example #4
0
static void
test_atomic_uint32(void)
{
	pg_atomic_uint32 var;
	uint32		expected;
	int			i;

	pg_atomic_init_u32(&var, 0);

	if (pg_atomic_read_u32(&var) != 0)
		elog(ERROR, "atomic_read_u32() #1 wrong");

	pg_atomic_write_u32(&var, 3);

	if (pg_atomic_read_u32(&var) != 3)
		elog(ERROR, "atomic_read_u32() #2 wrong");

	if (pg_atomic_fetch_add_u32(&var, 1) != 3)
		elog(ERROR, "atomic_fetch_add_u32() #1 wrong");

	if (pg_atomic_fetch_sub_u32(&var, 1) != 4)
		elog(ERROR, "atomic_fetch_sub_u32() #1 wrong");

	if (pg_atomic_sub_fetch_u32(&var, 3) != 0)
		elog(ERROR, "atomic_sub_fetch_u32() #1 wrong");

	if (pg_atomic_add_fetch_u32(&var, 10) != 10)
		elog(ERROR, "atomic_add_fetch_u32() #1 wrong");

	if (pg_atomic_exchange_u32(&var, 5) != 10)
		elog(ERROR, "pg_atomic_exchange_u32() #1 wrong");

	if (pg_atomic_exchange_u32(&var, 0) != 5)
		elog(ERROR, "pg_atomic_exchange_u32() #0 wrong");

	/* test around numerical limits */
	if (pg_atomic_fetch_add_u32(&var, INT_MAX) != 0)
		elog(ERROR, "pg_atomic_fetch_add_u32() #2 wrong");

	if (pg_atomic_fetch_add_u32(&var, INT_MAX) != INT_MAX)
		elog(ERROR, "pg_atomic_add_fetch_u32() #3 wrong");

	pg_atomic_fetch_add_u32(&var, 1);	/* top up to UINT_MAX */

	if (pg_atomic_read_u32(&var) != UINT_MAX)
		elog(ERROR, "atomic_read_u32() #2 wrong");

	if (pg_atomic_fetch_sub_u32(&var, INT_MAX) != UINT_MAX)
		elog(ERROR, "pg_atomic_fetch_sub_u32() #2 wrong");

	if (pg_atomic_read_u32(&var) != (uint32) INT_MAX + 1)
		elog(ERROR, "atomic_read_u32() #3 wrong: %u", pg_atomic_read_u32(&var));

	expected = pg_atomic_sub_fetch_u32(&var, INT_MAX);
	if (expected != 1)
		elog(ERROR, "pg_atomic_sub_fetch_u32() #3 wrong: %u", expected);

	pg_atomic_sub_fetch_u32(&var, 1);

	/* fail exchange because of old expected */
	expected = 10;
	if (pg_atomic_compare_exchange_u32(&var, &expected, 1))
		elog(ERROR, "atomic_compare_exchange_u32() changed value spuriously");

	/* CAS is allowed to fail due to interrupts, try a couple of times */
	for (i = 0; i < 1000; i++)
	{
		expected = 0;
		if (!pg_atomic_compare_exchange_u32(&var, &expected, 1))
			break;
	}
	if (i == 1000)
		elog(ERROR, "atomic_compare_exchange_u32() never succeeded");
	if (pg_atomic_read_u32(&var) != 1)
		elog(ERROR, "atomic_compare_exchange_u32() didn't set value properly");

	pg_atomic_write_u32(&var, 0);

	/* try setting flagbits */
	if (pg_atomic_fetch_or_u32(&var, 1) & 1)
		elog(ERROR, "pg_atomic_fetch_or_u32() #1 wrong");

	if (!(pg_atomic_fetch_or_u32(&var, 2) & 1))
		elog(ERROR, "pg_atomic_fetch_or_u32() #2 wrong");

	if (pg_atomic_read_u32(&var) != 3)
		elog(ERROR, "invalid result after pg_atomic_fetch_or_u32()");

	/* try clearing flagbits */
	if ((pg_atomic_fetch_and_u32(&var, ~2) & 3) != 3)
		elog(ERROR, "pg_atomic_fetch_and_u32() #1 wrong");

	if (pg_atomic_fetch_and_u32(&var, ~1) != 1)
		elog(ERROR, "pg_atomic_fetch_and_u32() #2 wrong: is %u",
			 pg_atomic_read_u32(&var));
	/* no bits set anymore */
	if (pg_atomic_fetch_and_u32(&var, ~0) != 0)
		elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong");
}
Example #5
0
/*
 * Perform garbage collection (if required) of file
 * @param map_path path to file map file (*.map). 
 */
static bool cfs_gc_file(char* map_path)
{
	int md = open(map_path, O_RDWR|PG_BINARY, 0);
	FileMap* map;
	uint32 physSize;
	uint32 usedSize;
	uint32 virtSize;
	int suf = strlen(map_path)-4;
	int fd = -1, fd2 = -1, md2 = -1;
	bool succeed = true;

	if (md < 0) { 
		elog(LOG, "Failed to open map file %s: %m", map_path);
		return false;
	}
	map = cfs_mmap(md);
	if (map == MAP_FAILED) {
		elog(LOG, "Failed to map file %s: %m", map_path);
		close(md);
		return false;
	}
	usedSize = pg_atomic_read_u32(&map->usedSize);
	physSize = pg_atomic_read_u32(&map->physSize);
	virtSize = pg_atomic_read_u32(&map->virtSize);
		
	if ((physSize - usedSize)*100 > physSize*cfs_gc_threshold) /* do we need to perform defragmentation? */
	{ 
		long delay = CFS_LOCK_MIN_TIMEOUT;		
		char* file_path = (char*)palloc(suf+1);
		char* map_bck_path = (char*)palloc(suf+10);
		char* file_bck_path = (char*)palloc(suf+5);
		FileMap* newMap = (FileMap*)palloc0(sizeof(FileMap));
		uint32 newSize = 0;
		inode_t** inodes = (inode_t**)palloc(RELSEG_SIZE*sizeof(inode_t*));
		bool remove_backups = true;
		int n_pages = virtSize / BLCKSZ;
		TimestampTz startTime, endTime;
		long secs;
		int usecs;
		int i;
		
		startTime = GetCurrentTimestamp();

		memcpy(file_path, map_path, suf);
		file_path[suf] = '\0';
		strcat(strcpy(map_bck_path, map_path), ".bck");
		strcat(strcpy(file_bck_path, file_path), ".bck");

		while (true) { 
			uint32 access_count = 0;
			if (pg_atomic_compare_exchange_u32(&map->lock, &access_count, CFS_GC_LOCK)) {				
				break;
			}
			if (access_count >= CFS_GC_LOCK) { 
				/* Uhhh... looks like last GC was interrupted.
				 * Try to recover file
				 */
				if (access(file_bck_path, R_OK) != 0) {
					/* There is no backup file: new map should be constructed */					
					md2 = open(map_bck_path, O_RDWR|PG_BINARY, 0);
					if (md2 >= 0) { 
						/* Recover map */
						if (!cfs_read_file(md2, newMap, sizeof(FileMap))) { 
							elog(LOG, "Failed to read file %s: %m", map_bck_path);
							goto Cleanup;
						}
						close(md2);
						md2 = -1;
						newSize = pg_atomic_read_u32(&newMap->usedSize);
						remove_backups = false;
						goto ReplaceMap;
					}
				} else { 
					/* Presence of backup file means that we still have unchanged data and map files.
					 * Just remove backup files, grab lock and continue processing
					 */
					unlink(file_bck_path);
					unlink(map_bck_path);
					break;
				}
			}
			pg_usleep(delay);
			if (delay < CFS_LOCK_MAX_TIMEOUT) { 
				delay *= 2;
			}
		}				 			
		md2 = open(map_bck_path, O_CREAT|O_RDWR|PG_BINARY|O_TRUNC, 0600);
		if (md2 < 0) { 
			goto Cleanup;
		}
		for (i = 0; i < n_pages; i++) { 
			newMap->inodes[i] = map->inodes[i];
		    inodes[i] = &newMap->inodes[i];
		}
		/* sort inodes by offset to improve read locality */
		qsort(inodes, n_pages, sizeof(inode_t*), cfs_cmp_page_offs);
		
		fd = open(file_path, O_RDWR|PG_BINARY, 0);
		if (fd < 0) { 
			goto Cleanup;
		}
		
		fd2 = open(file_bck_path, O_CREAT|O_RDWR|PG_BINARY|O_TRUNC, 0600);
		if (fd2 < 0) { 
			goto Cleanup;
		}
		
		for (i = 0; i < n_pages; i++) { 
			int size = CFS_INODE_SIZE(*inodes[i]);
			if (size != 0) { 
				char block[BLCKSZ];
				off_t rc PG_USED_FOR_ASSERTS_ONLY;
				uint32 offs = CFS_INODE_OFFS(*inodes[i]);
				Assert(size <= BLCKSZ);	
				rc = lseek(fd, offs, SEEK_SET);
				Assert(rc == offs);
				
				if (!cfs_read_file(fd, block, size)) { 
					elog(LOG, "Failed to read file %s: %m", file_path);
					goto Cleanup;
				}
				
				if (!cfs_write_file(fd2, block, size)) { 
					elog(LOG, "Failed to write file %s: %m", file_bck_path);
					goto Cleanup;
				}
				offs = newSize;
				newSize += size;
				*inodes[i] = CFS_INODE(size, offs);
			}
		}
		pg_atomic_write_u32(&map->usedSize, newSize);

		if (close(fd) < 0) { 
			elog(LOG, "Failed to close file %s: %m", file_path);
			goto Cleanup;
		}
		fd = -1;

		/* Persist copy of data file */
		if (pg_fsync(fd2) < 0) { 
			elog(LOG, "Failed to sync file %s: %m", file_bck_path);
			goto Cleanup;
		}
		if (close(fd2) < 0) { 
			elog(LOG, "Failed to close file %s: %m", file_bck_path);
			goto Cleanup;
		}
		fd2 = -1;

		/* Persist copy of map file */
		if (!cfs_write_file(md2, &newMap, sizeof(newMap))) { 
			elog(LOG, "Failed to write file %s: %m", map_bck_path);
			goto Cleanup;
		}
		if (pg_fsync(md2) < 0) { 
			elog(LOG, "Failed to sync file %s: %m", map_bck_path);
			goto Cleanup;
		}
		if (close(md2) < 0) { 
			elog(LOG, "Failed to close file %s: %m", map_bck_path);
			goto Cleanup;
		}
		md2 = -1;

		/* Persist map with CFS_GC_LOCK set: in case of crash we will know that map may be changed by GC */
		if (cfs_msync(map) < 0) {
			elog(LOG, "Failed to sync map %s: %m", map_path);
			goto Cleanup;
		}
		if (pg_fsync(md) < 0) { 
			elog(LOG, "Failed to sync file %s: %m", map_path);
			goto Cleanup;
		}
		
		/* 
		 * Now all information necessary for recovery is stored.
		 * We are ready to replace existed file with defragmented one.
		 * Use rename and rely on file system to provide atomicity of this operation.
		 */
		remove_backups = false;
		if (rename(file_bck_path, file_path) < 0) { 
			elog(LOG, "Failed to rename file %s: %m", file_path);
			goto Cleanup;
		}
	  ReplaceMap:
		/* At this moment defragmented file version is stored. We can perfrom in-place update of map.
		 * If crash happens at this point, map can be recovered from backup file */
		memcpy(map->inodes, newMap->inodes, n_pages * sizeof(inode_t));
		pg_atomic_write_u32(&map->usedSize, newSize);
		pg_atomic_write_u32(&map->physSize, newSize);
		map->generation += 1; /* force all backends to reopen the file */
		
		/* Before removing backup files and releasing locks we need to flush updated map file */
		if (cfs_msync(map) < 0) {
			elog(LOG, "Failed to sync map %s: %m", map_path);
			goto Cleanup;
		}
		if (pg_fsync(md) < 0) { 
			elog(LOG, "Failed to sync file %s: %m", map_path);
		  Cleanup:
			if (fd >= 0) close(fd);
			if (fd2 >= 0) close(fd2);
			if (md2 >= 0) close(md2);
			if (remove_backups) { 
				unlink(file_bck_path);
				unlink(map_bck_path);		
				remove_backups = false;
			}	
			succeed = false;
		} else { 
			remove_backups = true; /* now backups are not need any more */
		}
		pg_atomic_fetch_sub_u32(&map->lock, CFS_GC_LOCK); /* release lock */

		/* remove map backup file */
		if (remove_backups && unlink(map_bck_path)) {
			elog(LOG, "Failed to unlink file %s: %m", map_bck_path);
			succeed = false;
		}
		
		endTime = GetCurrentTimestamp();
		TimestampDifference(startTime, endTime, &secs, &usecs);

		elog(LOG, "%d: defragment file %s: old size %d, new size %d, logical size %d, used %d, compression ratio %f, time %ld usec",
			 MyProcPid, file_path, physSize, newSize, virtSize, usedSize, (double)virtSize/newSize,
			 secs*USECS_PER_SEC + usecs);

		pfree(file_path);
		pfree(file_bck_path);
		pfree(map_bck_path);
		pfree(inodes);
		pfree(newMap);
		
		if (cfs_gc_delay != 0) { 
			int rc = WaitLatch(MyLatch,
							   WL_TIMEOUT | WL_POSTMASTER_DEATH,
							   cfs_gc_delay /* ms */ );
			if (rc & WL_POSTMASTER_DEATH) {
				exit(1);
			}
		}
	} else if (cfs_state->max_iterations == 1) { 
		elog(LOG, "%d: file %.*s: physical size %d, logical size %d, used %d, compression ratio %f",
			 MyProcPid, suf, map_path, physSize, virtSize, usedSize, (double)virtSize/physSize);
	}
	
	if (cfs_munmap(map) < 0) { 
		elog(LOG, "Failed to unmap file %s: %m", map_path);
		succeed = false;
	}
	if (close(md) < 0) { 
		elog(LOG, "Failed to close file %s: %m", map_path);
		succeed = false;
	}
	return succeed;
}
Example #6
0
/*
 * Update logical file size
 */
void cfs_extend(FileMap* map, uint32 newSize)
{
	uint32 oldSize = pg_atomic_read_u32(&map->virtSize);
	while (newSize > oldSize && !pg_atomic_compare_exchange_u32(&map->virtSize, &oldSize, newSize));
}
Example #7
0
/*
 * Run cache eviction algorithm
 *
 * It will try to evict enough entries to add up to evictSize. Returns the
 * actual accumulated size of the entries evicted
 */
int64
Cache_Evict(Cache *cache, int64 evictRequestSize)
{
	Assert(NULL != cache);
	Assert(evictRequestSize > 0);

	Cache_TimedOperationStart();

	int64 evictedSize = 0;
	uint32 unsuccessfulLoops = 0;
	bool foundVictim = false;
	uint32 decAmount = cache->cacheHdr->policyContext.utilityDecrement;
	Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats;

	while (true)
	{

		bool wraparound = false;
		int32 entryIdx = Cache_NextClockHand(cache, &wraparound);
		Assert(entryIdx < cache->cacheHdr->nEntries);

		Cache_AddPerfCounter(&cacheStats->noEntriesScanned,1 /* delta */);

		if (wraparound)
		{
			unsuccessfulLoops++;

			Cache_AddPerfCounter(&cacheStats->noWraparound, 1 /* delta */);

			if (!foundVictim)
			{
				/*
				 * We looped around and did not manage to evict any entries.
				 * Double the amount we decrement eviction candidate's utility by.
				 * This makes the eviction algorithm look for a victim more aggressively
				 */
				if (decAmount <= CACHE_MAX_UTILITY / 2)
				{
					decAmount = 2 * decAmount;
				}
				else
				{
					decAmount = CACHE_MAX_UTILITY;
				}
			}
			foundVictim = false;

			if (unsuccessfulLoops > cache->cacheHdr->policyContext.maxClockLoops)
			{
				/* Can't find any cached and unused entries candidates for evictions, even after looping around
				 * maxClockLoops times. Give up looking for victims. */
				Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);
				break;
			}
		}

		CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx);
		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Not interested in free/acquired/deleted entries. Go back and advance clock hand */
			continue;
		}

		CacheAnchor *anchor = (CacheAnchor *) SyncHTLookup(cache->syncHashtable, &crtEntry->hashvalue);
		if (NULL == anchor)
		{
			/* There's no anchor for this entry, someone might have snatched it in the meantime */
			continue;
		}

		SpinLockAcquire(&anchor->spinlock);

		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Someone freed this entry in the meantime, before we got a chance to acquire the anchor lock */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Ok, did all the checks, this entry must be valid now */
		CACHE_ASSERT_VALID(crtEntry);

		if (crtEntry->pinCount > 0)
		{
			/* Entry is in use and can't be evicted. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Decrement utility */
		gp_atomic_dec_positive_32(&crtEntry->utility, decAmount);
		/* Just decremented someone's utility. Reset our unsuccessful loops counter */
		unsuccessfulLoops = 0;

		if (crtEntry->utility > 0)
		{
			/* Entry has non-zero utility, we shouldn't evict it. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Found our victim */
		Assert(0 == crtEntry->pinCount);
		CACHE_ASSERT_VALID(crtEntry);
		Assert(crtEntry->utility == 0);

		uint32 expected = CACHE_ENTRY_CACHED;
#if USE_ASSERT_CHECKING
		int32 casResult =
#endif
		pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&crtEntry->state, &expected, CACHE_ENTRY_DELETED);
		Assert(1 == casResult);

		SpinLockRelease(&anchor->spinlock);
		foundVictim = true;
		evictedSize += crtEntry->size;

		/* Don't update noFreeEntries yet. It will be done in Cache_AddToFreelist */
		Cache_DecPerfCounter(&cacheStats->noCachedEntries, 1 /* delta */);

		/* Unlink entry from the anchor chain */
		SpinLockAcquire(&anchor->spinlock);
		Cache_UnlinkEntry(cache, anchor, crtEntry);
		SpinLockRelease(&anchor->spinlock);

		SyncHTRelease(cache->syncHashtable, (void *) anchor);

		/* Call client-side cleanup for entry */
		cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(crtEntry));

		Cache_LockEntry(cache, crtEntry);

		Assert(crtEntry->state == CACHE_ENTRY_DELETED);
		crtEntry->state = CACHE_ENTRY_FREE;

#if USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, crtEntry);
#endif

		Cache_UnlockEntry(cache, crtEntry);

		Cache_AddToFreelist(cache, crtEntry);

		Cache_AddPerfCounter(&cacheStats->noEvicts, 1 /* delta */);
		Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);

		if (evictedSize >= evictRequestSize)
		{
			/* We evicted as much as requested */
			break;
		}

		Cache_TimedOperationStart();

	}

	return evictedSize;
}
Example #8
0
/*
 * Inserts a previously acquired entry in the cache.
 *
 * This function should never fail.
 */
void
Cache_Insert(Cache *cache, CacheEntry *entry)
{
	Assert(NULL != cache);
	Assert(NULL != entry);

	Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats;
	Cache_TimedOperationStart();
	Cache_AddPerfCounter(&cacheStats->noInserts, 1 /* delta */);
	Cache_AddPerfCounter(&cacheStats->noCachedEntries, 1 /* delta */);
	Cache_DecPerfCounter(&cacheStats->noAcquiredEntries, 1 /* delta */);
	Cache_AddPerfCounter64(&cacheStats->totalEntrySize, entry->size);

	Cache_ComputeEntryHashcode(cache, entry);

	/* Look up or insert anchor element for this entry */
	bool existing = false;
	volatile CacheAnchor *anchor = SyncHTInsert(cache->syncHashtable, &entry->hashvalue, &existing);
	/*
	 * This should never happen since the SyncHT has as many entries as the SharedCache,
	 * and we'll run out of SharedCache entries before we fill up the SyncHT
	 */
	insist_log(NULL != anchor, "Could not insert in the cache: SyncHT full");

	/* Acquire anchor lock to touch the chain */
	SpinLockAcquire(&anchor->spinlock);

	if (NULL == anchor->firstEntry)
	{
		Assert(NULL == anchor->lastEntry);
		anchor->firstEntry = anchor->lastEntry = entry;
	}
	else
	{
		Assert(NULL != anchor->lastEntry);
		anchor->lastEntry->nextEntry = entry;
		anchor->lastEntry = entry;
	}
	entry->nextEntry = NULL;

	Cache_EntryAddRef(cache, entry);

	uint32 expected = CACHE_ENTRY_ACQUIRED;
#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif
	pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&entry->state, &expected, CACHE_ENTRY_CACHED);
	Assert(1 == casResult);
	Assert(NULL != anchor->firstEntry && NULL != anchor->lastEntry);

	SpinLockRelease(&anchor->spinlock);

#ifdef USE_ASSERT_CHECKING
	bool deleted = 
#endif
	SyncHTRelease(cache->syncHashtable, (void *) anchor);
	Assert(!deleted);

	Cache_TimedOperationRecord(&cacheStats->timeInserts,
			&cacheStats->maxTimeInsert);
}
Example #9
0
/*
 * Marks the current process as clean. If all the processes are marked
 * as clean for this session (i.e., cleanupCountdown == 0 in the
 * MySessionState) then we reset session's runaway status as well as
 * the runaway detector flag (i.e., a new runaway detector can run).
 *
 * Parameters:
 * 		ignoredCleanup: whether the cleanup was ignored, i.e., no elog(ERROR, ...)
 * 		was thrown. In such case a deactivated process is not reactivated as the
 * 		deactivation didn't get interrupted.
 */
void
RunawayCleaner_RunawayCleanupDoneForProcess(bool ignoredCleanup)
{
	/*
	 * We don't do anything if we don't have an ongoing cleanup, or we already finished
	 * cleanup once for the current runaway event
	 */
	if (beginCleanupRunawayVersion != *latestRunawayVersion ||
			endCleanupRunawayVersion == beginCleanupRunawayVersion)
	{
		/* Either we never started cleanup, or we already finished */
		return;
	}

	/* Disable repeating call */
	endCleanupRunawayVersion = beginCleanupRunawayVersion;

	Assert(NULL != MySessionState);
	/*
	 * As the current cleanup holds leverage on the  cleanupCountdown,
	 * the session must stay as runaway at least until the current
	 * process marks itself clean
	 */
	Assert(MySessionState->runawayStatus != RunawayStatus_NotRunaway);

	/* We only cleanup if we were active when the runaway event happened */
	Assert((!isProcessActive && *latestRunawayVersion < deactivationVersion &&
			*latestRunawayVersion > activationVersion) ||
			(*latestRunawayVersion > activationVersion &&
			(activationVersion >= deactivationVersion && isProcessActive)));

	/*
	 * We don't reactivate if the process is already active or a deactivated
	 * process never errored out during deactivation (i.e., failed to complete
	 * deactivation)
	 */
	if (!isProcessActive && !ignoredCleanup)
	{
		Assert(1 == *isRunawayDetector);
		Assert(0 < MySessionState->cleanupCountdown);
		/*
		 * As the process threw ERROR instead of going into ReadCommand() blocking
		 * state, we have to reactivate the process from its current Deactivated
		 * state
		 */
		IdleTracker_ActivateProcess();
	}

	Assert(0 < MySessionState->cleanupCountdown);
#if USE_ASSERT_CHECKING
	int cleanProgress =
#endif
			pg_atomic_add_fetch_u32((pg_atomic_uint32 *)&MySessionState->cleanupCountdown, -1);
	Assert(0 <= cleanProgress);

	uint32 expected = 0;
	bool finalCleaner = pg_atomic_compare_exchange_u32((pg_atomic_uint32 *) &MySessionState->cleanupCountdown,
			&expected, CLEANUP_COUNTDOWN_BEFORE_RUNAWAY);

	if (finalCleaner)
	{
		/*
		 * The final cleaner is responsible to reset the runaway flag,
		 * and enable the runaway detection process.
		 */
		RunawayCleaner_RunawayCleanupDoneForSession();
	}

	/*
	 * Finally we are done with all critical cleanup, which includes releasing all our memory and
	 * releasing our cleanup counter so that another session can be marked as runaway, if needed.
	 * Now, we have some head room to actually record our usage.
	 */
	write_stderr("Logging memory usage because of runaway cleanup. Note, this is a post-cleanup logging and may be incomplete.");
	MemoryAccounting_SaveToLog();
	MemoryContextStats(TopMemoryContext);
}