예제 #1
0
파일: sharedcache.c 프로젝트: AnLingm/gpdb
/*
 * Return a previously acquired entry to the cache freelist.
 * Calls the client-specific cleanup before returning to the freelist.
 *
 * Unregisters the entry from the cleanup list if requested.
 */
static void
Cache_ReleaseAcquired(Cache *cache, CacheEntry *entry, bool unregisterCleanup)
{
	Assert(NULL != cache);
	Assert(NULL != entry);
	Assert(CACHE_ENTRY_ACQUIRED == entry->state);

	/* Unregister entry from the cleanup list if requested */
	if (unregisterCleanup)
	{
		Cache_UnregisterCleanup(cache, entry);
	}

	PG_TRY();
	{
		/* Call client-specific cleanup function before removing entry from cache */
		cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(entry));
	}
	PG_CATCH();
	{

		/* Grab entry lock to ensure exclusive access to it while we're touching it */
		Cache_LockEntry(cache, entry);

		/* No need for atomic operations as long as we hold the entry lock */
		entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, entry);
#endif

		Cache_UnlockEntry(cache, entry);

		/* Link entry back in the freelist */
		Cache_AddToFreelist(cache, entry);

		PG_RE_THROW();
	}
	PG_END_TRY();

	/* Grab entry lock to ensure exclusive access to it while we're touching it */
	Cache_LockEntry(cache, entry);

	/* No need for atomic operations as long as we hold the entry lock */
	entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
	Cache_MemsetPayload(cache, entry);
#endif

	Cache_UnlockEntry(cache, entry);

	Cache_AddToFreelist(cache, entry);

	Cache_DecPerfCounter(&cache->cacheHdr->cacheStats.noAcquiredEntries, 1 /* delta */ );
}
예제 #2
0
/*
 * Sweeps through the cache and marks all entries as deleted
 *
 * Returns the number of elements it found and marked deleted.
 */
int32
Cache_Clear(Cache *cache)
{
	Assert(NULL != cache);

	int32 startIdx = cdb_randint(cache->cacheHdr->nEntries - 1, 0);
	int32 entryIdx = startIdx;
	int32 numClearedEntries = 0;

	while (true)
	{
		entryIdx = (entryIdx + 1) % cache->cacheHdr->nEntries;
		if (entryIdx == startIdx)
		{
			/* Completed one loop through the list of all entries. We're done */
			break;
		}

		CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx);

		/* Lock entry so that nobody else changes its state until we're done with it */
		Cache_LockEntry(cache, crtEntry);

		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Not interested in free/acquired/deleted entries. Go back and look at next entry */
			Cache_UnlockEntry(cache, crtEntry);
			continue;
		}

		/* Found cached entry */
		Cache_EntryAddRef(cache, crtEntry);

		if (crtEntry->state == CACHE_ENTRY_FREE || crtEntry->state == CACHE_ENTRY_ACQUIRED)
		{
			/* Someone freed up the entry before we had a chance to Add-Ref it. Skip it. */
			Assert(false);
			Cache_EntryDecRef(cache, crtEntry);
			Cache_UnlockEntry(cache, crtEntry);
			continue;
		}

		Cache_RegisterCleanup(cache, crtEntry, true /* isCachedEntry */);

		Cache_Remove(cache, crtEntry);

		/* Done with changing the state. Unlock the entry */
		Cache_UnlockEntry(cache, crtEntry);

		Cache_Release(cache, crtEntry);

		numClearedEntries++;

	}

	return numClearedEntries;
}
/*
 * Look up an entry in the Global MDVSN component.
 * To avoid any concurrency issues, this returns a copy of the entry,
 * palloc'ed in the current memory context. The caller is responsible
 * for freeing this copy.
 *
 * 	 Returns a copy of the entry if found, NULL otherwise.
 *
 */
mdver_entry *
mdver_glob_mdvsn_find(Oid oid)
{

	Assert(NULL != mdver_glob_mdvsn);

	mdver_entry mdver_info;
	mdver_info.key = oid;

	/* FIXME gcaragea 03/18/2014: Trigger evictions if cache is full (MPP-22923) */
	CacheEntry *localEntry = Cache_AcquireEntry(mdver_glob_mdvsn, &mdver_info);
	Assert(NULL != localEntry);

	CacheEntry *cachedEntry = Cache_Lookup(mdver_glob_mdvsn, localEntry);

	/* Release local entry. We don't need it anymore */
	Cache_Release(mdver_glob_mdvsn, localEntry);

	mdver_entry *mdver_copy = NULL;
	if (NULL != cachedEntry)
	{
		/* Found a match. Make a local copy */
		mdver_entry *shared_mdver = (mdver_entry *) CACHE_ENTRY_PAYLOAD(cachedEntry);
		mdver_copy = (mdver_entry *) palloc0(sizeof(mdver_entry));

		/* Lock entry to ensure atomicity of copy */
		Cache_LockEntry(mdver_glob_mdvsn, cachedEntry);

		memcpy(mdver_copy, shared_mdver, sizeof(mdver_entry));

		/* Got the copy, unlock entry */
		Cache_UnlockEntry(mdver_glob_mdvsn, cachedEntry);

		/*
		 * We're also done with the entry, release our pincount on it
		 *
		 * TODO gcaragea 05/02/2014: Are there cases where we need to hold the
		 * entry past this point? (MPP-22923)
		 */
		Cache_Release(mdver_glob_mdvsn, cachedEntry);
	}

	return mdver_copy;
}
예제 #4
0
/*
 * Run cache eviction algorithm
 *
 * It will try to evict enough entries to add up to evictSize. Returns the
 * actual accumulated size of the entries evicted
 */
int64
Cache_Evict(Cache *cache, int64 evictRequestSize)
{
	Assert(NULL != cache);
	Assert(evictRequestSize > 0);

	Cache_TimedOperationStart();

	int64 evictedSize = 0;
	uint32 unsuccessfulLoops = 0;
	bool foundVictim = false;
	uint32 decAmount = cache->cacheHdr->policyContext.utilityDecrement;
	Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats;

	while (true)
	{

		bool wraparound = false;
		int32 entryIdx = Cache_NextClockHand(cache, &wraparound);
		Assert(entryIdx < cache->cacheHdr->nEntries);

		Cache_UpdatePerfCounter(&cacheStats->noEntriesScanned,1 /* delta */);

		if (wraparound)
		{
			unsuccessfulLoops++;

			Cache_UpdatePerfCounter(&cacheStats->noWraparound, 1 /* delta */);

			if (!foundVictim)
			{
				/*
				 * We looped around and did not manage to evict any entries.
				 * Double the amount we decrement eviction candidate's utility by.
				 * This makes the eviction algorithm look for a victim more aggressively
				 */
				if (decAmount <= CACHE_MAX_UTILITY / 2)
				{
					decAmount = 2 * decAmount;
				}
				else
				{
					decAmount = CACHE_MAX_UTILITY;
				}
			}
			foundVictim = false;

			if (unsuccessfulLoops > cache->cacheHdr->policyContext.maxClockLoops)
			{
				/* Can't find any cached and unused entries candidates for evictions, even after looping around
				 * maxClockLoops times. Give up looking for victims. */
				Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);
				break;
			}
		}

		CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx);
		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Not interested in free/acquired/deleted entries. Go back and advance clock hand */
			continue;
		}

		CacheAnchor *anchor = (CacheAnchor *) SyncHTLookup(cache->syncHashtable, &crtEntry->hashvalue);
		if (NULL == anchor)
		{
			/* There's no anchor for this entry, someone might have snatched it in the meantime */
			continue;
		}

		SpinLockAcquire(&anchor->spinlock);

		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Someone freed this entry in the meantime, before we got a chance to acquire the anchor lock */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Ok, did all the checks, this entry must be valid now */
		CACHE_ASSERT_VALID(crtEntry);

		if (crtEntry->pinCount > 0)
		{
			/* Entry is in use and can't be evicted. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Decrement utility */
		gp_atomic_dec_positive_32(&crtEntry->utility, decAmount);
		/* Just decremented someone's utility. Reset our unsuccessful loops counter */
		unsuccessfulLoops = 0;

		if (crtEntry->utility > 0)
		{
			/* Entry has non-zero utility, we shouldn't evict it. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Found our victim */
		Assert(0 == crtEntry->pinCount);
		CACHE_ASSERT_VALID(crtEntry);
		Assert(crtEntry->utility == 0);

#if USE_ASSERT_CHECKING
		int32 casResult =
#endif
		compare_and_swap_32(&crtEntry->state, CACHE_ENTRY_CACHED, CACHE_ENTRY_DELETED);
		Assert(1 == casResult);

		SpinLockRelease(&anchor->spinlock);
		foundVictim = true;
		evictedSize += crtEntry->size;

		/* Don't update noFreeEntries yet. It will be done in Cache_AddToFreelist */
		Cache_UpdatePerfCounter(&cacheStats->noCachedEntries, -1 /* delta */);

		/* Unlink entry from the anchor chain */
		SpinLockAcquire(&anchor->spinlock);
		Cache_UnlinkEntry(cache, anchor, crtEntry);
		SpinLockRelease(&anchor->spinlock);

		SyncHTRelease(cache->syncHashtable, (void *) anchor);

		if (NULL != cache->cleanupEntry)
		{
			/* Call client-side cleanup for entry */
			cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(crtEntry));
		}

		Cache_LockEntry(cache, crtEntry);

		Assert(crtEntry->state == CACHE_ENTRY_DELETED);
		crtEntry->state = CACHE_ENTRY_FREE;

#if USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, crtEntry);
#endif

		Cache_UnlockEntry(cache, crtEntry);

		Cache_AddToFreelist(cache, crtEntry);

		Cache_UpdatePerfCounter(&cacheStats->noEvicts, 1 /* delta */);
		Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);

		if (evictedSize >= evictRequestSize)
		{
			/* We evicted as much as requested */
			break;
		}

		Cache_TimedOperationStart();

	}

	return evictedSize;
}
/*
 * Reconcile an incoming versioning event with an existing Global MDVSN entry
 * for the same versioned object.
 *
 * Each versioning event contains the old version and the new version as known
 * by the originating backend:
 *   VE = (key, oldV, newV)
 * Cached entry contains the current version globally visible:
 *   entry = (key, crtV)
 *
 * We have the following scenarios:
 *  - If oldV == crtV, (i.e. VE old version is the same as the current version)
 *     then nobody else has modified the object since the backend read it.
 *     We simply update the entry with the new version in that case:
 *       entry = (key, crtV) --> entry = (key, newV)
 *
 *  - If oldV < crtV, (i.e. VE old version is different than the current version)
 *     some other backend must have modified the object in the meantime.
 *    We generate an entirely new version new_newV for the object to reflect
 *     the new "combined" object.
 *
 *    The cached entry is updated directly with the new version:
 *        entry = (key, crtV) --> entry = (key, new_newV)
 *
 *    The versioning event in the queue is updated directly:
         VE = (key, oldV, newV)  --> VE = (key, crtV, new_newV)
 *
 *  event: The event containing the versioning information for an update
 *  cached_entry: The existing entry for this object in the Global MDVSN
 *
 * This function is called while the MDVerWriteLock is held in exclusive
 * mode. Don't do anything that is not allowed while holding a LWLock
 * (e.g. allocate memory, or call unsafe functions).
 *
 */
static void
mdver_globalhandler_reconcile(mdver_event *event, CacheEntry *cached_entry)
{

    /* Found existing entry, reconcile and update the version */
    mdver_entry *cached_mdver_entry = CACHE_ENTRY_PAYLOAD(cached_entry);

#ifdef MD_VERSIONING_INSTRUMENTATION
    elog(gp_mdversioning_loglevel, "Updating GlobalMDVSN entry %d: Current (%d,%d). Event: [(%d,%d)->(%d,%d)]",
         event->key,
         (int) cached_mdver_entry->ddl_version, (int) cached_mdver_entry->dml_version,
         (int) event->old_ddl_version, (int) event->old_dml_version,
         (int) event->new_ddl_version, (int) event->new_dml_version);
#endif

    /*
     * Reconcile and resolve conflicts for incoming versioning events.
     *  When a new versioning event is received at the Global MDVSN,
     *  look up if the same object has a conflicting version.
     * If so, resolve conflict by generating a new version.
     */

    uint64 new_ddl_version = event->new_ddl_version;
    uint64 new_dml_version = event->new_dml_version;
    bool conflict = false;

    /*
     * It is safe to read the cached_mdver_entry contents, since
     * we're holding the write lock on the Global MDVSN cache.
     */
    if (cached_mdver_entry->ddl_version != event->old_ddl_version)
    {
        new_ddl_version = mdver_next_global_version();
        conflict = true;
    }

    if (cached_mdver_entry->dml_version != event->old_dml_version)
    {
        new_dml_version = mdver_next_global_version();
        conflict = true;
    }

    if (conflict)
    {

#ifdef MD_VERSIONING_INSTRUMENTATION
        elog(gp_mdversioning_loglevel, "Updating event in the queue (pid=%d, oid=%d): Old event: [(%d,%d)->(%d,%d)]. Modified event: [(%d,%d)->(%d,%d)]",
             event->backend_pid,
             event->key,
             /* Old event */
             (int) event->old_ddl_version, (int) event->old_dml_version,
             (int) event->new_ddl_version, (int) event->new_dml_version,
             /* New event */
             (int) cached_mdver_entry->ddl_version, (int) cached_mdver_entry->dml_version,
             (int) new_ddl_version, (int) new_dml_version);
#endif

        /*
         * A new version for this object is being generated here.
         * We're going to directly update the event in the queue with the new
         * version.
         */

        event->new_ddl_version = new_ddl_version;
        event->new_dml_version = new_dml_version;

        /*
         * We're also updating the VE old version to reflect the current
         * visible global version
         */
        event->old_ddl_version = cached_mdver_entry->ddl_version;
        event->old_dml_version = cached_mdver_entry->dml_version;
    }

    /* About to update the cached entry. Lock entry to make update atomic */
    Cache *glob_mdvsn = mdver_get_glob_mdvsn();
    Cache_LockEntry(glob_mdvsn, cached_entry);

    cached_mdver_entry->ddl_version = new_ddl_version;
    cached_mdver_entry->dml_version = new_dml_version;

    Cache_UnlockEntry(glob_mdvsn, cached_entry);

}
예제 #6
0
/*
 * Internal version of the CacheRelease function
 *
 * Unregisters the entry from the cleanup list if requested.
 */
static void
Cache_ReleaseCached(Cache *cache, CacheEntry *entry, bool unregisterCleanup)
{
	Assert(NULL != cache);
	Assert(NULL != entry);
	Assert(CACHE_ENTRY_CACHED == entry->state || CACHE_ENTRY_DELETED == entry->state);

	Cache_ComputeEntryHashcode(cache, entry);

	volatile CacheAnchor *anchor = SyncHTLookup(cache->syncHashtable, &entry->hashvalue);
	Assert(anchor != NULL);

	/* Acquire anchor lock to touch the entry */
	SpinLockAcquire(&anchor->spinlock);
	Cache_LockEntry(cache, entry);

	uint32 pinCount = Cache_EntryDecRef(cache, entry);
	bool deleteEntry = false;

	if (pinCount == 0 && entry->state == CACHE_ENTRY_DELETED)
	{
		/* Delete the cache entry if pin-count = 0 and it is marked for deletion */
		Cache_UnlinkEntry(cache, (CacheAnchor *) anchor, entry);
		deleteEntry = true;

		Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noDeletedEntries, -1 /* delta */);
	}

	Cache_UnlockEntry(cache, entry);
	SpinLockRelease(&anchor->spinlock);

	/*
	 * Releasing anchor to hashtable.
	 * Ignoring 'removed' return value, both values are valid
	 */
	SyncHTRelease(cache->syncHashtable, (void *) anchor);

	/* If requested, unregister entry from the cleanup list */
	if (unregisterCleanup)
	{
		Cache_UnregisterCleanup(cache, entry);
	}

	if (deleteEntry)
	{

		if (NULL != cache->cleanupEntry)
		{
			PG_TRY();
			{
				/* Call client-specific cleanup function before removing entry from cache */
				cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(entry));
			}
			PG_CATCH();
			{

				/* Grab entry lock to ensure exclusive access to it while we're touching it */
				Cache_LockEntry(cache, entry);

				Assert(CACHE_ENTRY_DELETED == entry->state);
				entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
				Cache_MemsetPayload(cache, entry);
#endif

				Cache_UnlockEntry(cache, entry);

				/* Link entry back in the freelist */
				Cache_AddToFreelist(cache, entry);

				PG_RE_THROW();
			}
			PG_END_TRY();
		}

		/* Grab entry lock to ensure exclusive access to it while we're touching it */
		Cache_LockEntry(cache, entry);

		entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, entry);
#endif

		Cache_UnlockEntry(cache, entry);

		/* Link entry back in the freelist */
		Cache_AddToFreelist(cache, entry);
	}
}
예제 #7
0
파일: gp_workfile_mgr.c 프로젝트: d/gpdb
/*
 * Function returning all workfile cache entries for one segment
 */
Datum
gp_workfile_mgr_cache_entries(PG_FUNCTION_ARGS)
{

	FuncCallContext *funcctx;
	int32 *crtIndexPtr;

	if (SRF_IS_FIRSTCALL())
	{
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/* Switch to memory context appropriate for multiple function calls */
		MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/*
		 * Build a tuple descriptor for our result type
		 * The number and type of attributes have to match the definition of the
		 * view gp_workfile_mgr_cache_entries
		 */
		TupleDesc tupdesc = CreateTemplateTupleDesc(NUM_CACHE_ENTRIES_ELEM, false);

		Assert(NUM_CACHE_ENTRIES_ELEM == 12);

		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "path",
				TEXTOID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hash",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "size",
				INT8OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "state",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6, "workmem",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7, "optype",
				TEXTOID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 8, "slice",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 9, "sessionid",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 10, "commandid",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 11, "query_start",
				TIMESTAMPTZOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 12, "numfiles",
				INT4OID, -1 /* typmod */, 0 /* attdim */);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		crtIndexPtr = (int32 *) palloc(sizeof(*crtIndexPtr));
		*crtIndexPtr = 0;
		funcctx->user_fctx = crtIndexPtr;
		MemoryContextSwitchTo(oldcontext);
	}

	Cache *cache = workfile_mgr_get_cache();
	funcctx = SRF_PERCALL_SETUP();
	crtIndexPtr = (int32 *) funcctx->user_fctx;

	while (true)
	{

		CacheEntry *crtEntry = next_entry_to_list(cache, crtIndexPtr);

		if (!crtEntry)
		{
			/* Reached the end of the entry array, we're done */
			SRF_RETURN_DONE(funcctx);
		}

		Datum		values[NUM_CACHE_ENTRIES_ELEM];
		bool		nulls[NUM_CACHE_ENTRIES_ELEM];
		MemSet(nulls, 0, sizeof(nulls));

		workfile_set *work_set = CACHE_ENTRY_PAYLOAD(crtEntry);
		char work_set_path[MAXPGPATH] = "";
		char *work_set_operator_name = NULL;


		/*
		 * Lock entry in order to read its payload
		 * Don't call any functions that can get interrupted or
		 * that palloc memory while holding this lock.
		 */
		Cache_LockEntry(cache, crtEntry);

		if (!should_list_entry(crtEntry))
		{
			Cache_UnlockEntry(cache, crtEntry);
			continue;
		}

		values[0] = Int32GetDatum(GpIdentity.segindex);
		strlcpy(work_set_path, work_set->path, MAXPGPATH);

		values[2] = UInt32GetDatum(crtEntry->hashvalue);

		int64 work_set_size = work_set->size;
		if (crtEntry->state == CACHE_ENTRY_ACQUIRED)
		{
			/*
			 * work_set->size is not updated until the entry is cached.
			 * For in-progress queries, the up-to-date size is stored in
			 * work_set->in_progress_size.
			 */
			work_set_size = work_set->in_progress_size;
		}

		values[3] = Int64GetDatum(work_set_size);
		values[4] = UInt32GetDatum(crtEntry->state);
		values[5] = UInt32GetDatum(work_set->metadata.operator_work_mem);

		work_set_operator_name = gp_workfile_operator_name(work_set->node_type);
		values[7] = UInt32GetDatum(work_set->slice_id);
		values[8] = UInt32GetDatum(work_set->session_id);
		values[9] = UInt32GetDatum(work_set->command_count);
		values[10] = TimestampTzGetDatum(work_set->session_start_time);
		values[11] = UInt32GetDatum(work_set->no_files);

		/* Done reading from the payload of the entry, release lock */
		Cache_UnlockEntry(cache, crtEntry);

		/*
		 * Fill in the rest of the entries of the tuple with data copied
		 * from the descriptor.
		 * CStringGetTextDatum calls palloc so we cannot do this while
		 * holding the lock above.
		 */
		values[1] = CStringGetTextDatum(work_set_path);
		values[6] = CStringGetTextDatum(work_set_operator_name);

		HeapTuple tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		Datum result = HeapTupleGetDatum(tuple);
		SRF_RETURN_NEXT(funcctx, result);
	}
}