Beispiel #1
0
/*
 * Create a new file set
 *   type is the WorkFileType for the files: BUFFILE or BFZ
 *   can_be_reused: if set to false, then we don't insert this set into the cache,
 *     since the caller is telling us there is no point. This can happen for
 *     example when spilling during index creation.
 *   ps is the PlanState for the subtree rooted at the operator
 *   snapshot contains snapshot information for the current transaction
 *
 */
workfile_set *
workfile_mgr_create_set(enum ExecWorkFileType type, bool can_be_reused, PlanState *ps)
{
	Assert(NULL != workfile_mgr_cache);

	Plan *plan = NULL;
	if (ps != NULL)
	{
		plan = ps->plan;
	}

	AssertImply(can_be_reused, plan != NULL);

	NodeTag node_type = T_Invalid;
	if (ps != NULL)
	{
		node_type = ps->type;
	}
	char *dir_path = create_workset_directory(node_type, currentSliceId);


	if (!workfile_sets_resowner_callback_registered)
	{
		RegisterResourceReleaseCallback(workfile_set_free_callback, NULL);
		workfile_sets_resowner_callback_registered = true;
	}

	/* Create parameter info for the populate function */
	workset_info set_info;
	set_info.file_type = type;
	set_info.nodeType = node_type;
	set_info.dir_path = dir_path;
	set_info.session_start_time = GetCurrentTimestamp();
	set_info.operator_work_mem = get_operator_work_mem(ps);

	CacheEntry *newEntry = Cache_AcquireEntry(workfile_mgr_cache, &set_info);

	if (NULL == newEntry)
	{
		/* Clean up the directory we created. */
		workfile_mgr_delete_set_directory(dir_path);

		/* Could not acquire another entry from the cache - we filled it up */
		ereport(ERROR,
				(errmsg("could not create workfile manager entry: exceeded number of concurrent spilling queries")));
	}

	/* Path has now been copied to the workfile_set. We can free it */
	pfree(dir_path);

	/* Complete initialization of the entry with post-acquire actions */
	Assert(NULL != newEntry);
	workfile_set *work_set = CACHE_ENTRY_PAYLOAD(newEntry);
	Assert(work_set != NULL);

	elog(gp_workfile_caching_loglevel, "new spill file set. key=0x%x prefix=%s opMemKB=" INT64_FORMAT,
			work_set->key, work_set->path, work_set->metadata.operator_work_mem);

	return work_set;
}
/*
 * Add or update an entry in the Global MDVSN cache for a versioning event
 * found in the event list. Reconcile with current contents of the cache
 * if needed.
 *  event: The event containing the versioning information for an update
 */
static void
mdver_globalhandler_add_version(mdver_event *event)
{
    Assert(NULL != event);

    Cache *glob_mdvsn = mdver_get_glob_mdvsn();

    if (mdver_is_nuke_event(event))
    {
        mdver_glob_mdvsn_nuke();
        return;
    }

    mdver_entry mdver = { InvalidOid, INVALID_MD_VERSION, INVALID_MD_VERSION };
    mdver.key = event->key;
    mdver.ddl_version = INVALID_MD_VERSION;
    mdver.dml_version = INVALID_MD_VERSION;

    /* FIXME gcaragea 04/14/2014: Trigger evictions if cache is full (MPP-22923) */
    CacheEntry *acquired_entry = Cache_AcquireEntry(glob_mdvsn, &mdver);
    Assert(NULL != acquired_entry);

    /*
     * We're about to look-up and insert/update a shared cache entry.
     * Grab writer lock in exclusive mode, so that no other backend
     * tries to insert or update the same entry at the same time.
     */
    LWLockAcquire(MDVerWriteLock, LW_EXCLUSIVE);

    CacheEntry *cached_entry = Cache_Lookup(glob_mdvsn, acquired_entry);

    if (NULL != cached_entry)
    {
        mdver_globalhandler_reconcile(event, cached_entry);

        /* Done with the looked-up entry. Release it */
        Cache_Release(glob_mdvsn, cached_entry);
    }
    else
    {
        /* Entry not found, insert new entry */
        mdver_entry *new_mdver_entry = CACHE_ENTRY_PAYLOAD(acquired_entry);

#ifdef MD_VERSIONING_INSTRUMENTATION
        elog(gp_mdversioning_loglevel, "Inserting into GlobalMDVSN entry %d: (%d,%d)",
             event->key,
             (int) event->new_ddl_version, (int) event->new_dml_version);
#endif

        new_mdver_entry->ddl_version = event->new_ddl_version;
        new_mdver_entry->dml_version = event->new_dml_version;

        Cache_Insert(glob_mdvsn, acquired_entry);
    }

    Cache_Release(glob_mdvsn, acquired_entry);
    LWLockRelease(MDVerWriteLock);
}
Beispiel #3
0
/*
 *  Look up file set the cache given a certain PlanState.
 *  Return NULL if not found.
 */
static workfile_set *
workfile_mgr_lookup_set(PlanState *ps)
{
	Assert(NULL != ps);
	Assert(NULL != workfile_mgr_cache);
	Assert(NULL != ps->plan);
	Assert(nodeTag(ps->plan) >= T_Plan && nodeTag(ps->plan) < T_PlanInvalItem);

	/* Create parameter info for the populate function */
	workset_info set_info;
	set_info.dir_path = NULL;
	set_info.operator_work_mem = get_operator_work_mem(ps);
	set_info.on_disk = false;

	CacheEntry *localEntry = acquire_entry_retry(workfile_mgr_cache, &set_info);
	Assert(localEntry != NULL);

	workfile_set *local_work_set = (workfile_set *) CACHE_ENTRY_PAYLOAD(localEntry);

	/* Populate the rest of the entries needed for look-up
	 * Allocate the serialized plan in the TopMemoryContext since this memory
	 * context is still available when calling the transaction callback at the
	 * time when the transaction aborts.
	 */
	MemoryContext oldcxt = MemoryContextSwitchTo(TopMemoryContext);
	workfile_set_plan *s_plan = workfile_mgr_serialize_plan(ps);
	MemoryContextSwitchTo(oldcxt);

	Assert(s_plan != NULL);
	local_work_set->set_plan = s_plan;
	local_work_set->key = workfile_mgr_hash_key(s_plan);

	CacheEntry *cachedEntry = Cache_Lookup(workfile_mgr_cache, localEntry);

	/* Release local entry and free up plan memory. We don't need it anymore */
	Cache_Release(workfile_mgr_cache, localEntry);

	workfile_set *work_set = NULL;
	if (NULL != cachedEntry)
	{
		work_set = (workfile_set *) CACHE_ENTRY_PAYLOAD(cachedEntry);
	}

	return work_set;
}
Beispiel #4
0
/*
 * Return a previously acquired entry to the cache freelist.
 * Calls the client-specific cleanup before returning to the freelist.
 *
 * Unregisters the entry from the cleanup list if requested.
 */
static void
Cache_ReleaseAcquired(Cache *cache, CacheEntry *entry, bool unregisterCleanup)
{
	Assert(NULL != cache);
	Assert(NULL != entry);
	Assert(CACHE_ENTRY_ACQUIRED == entry->state);

	/* Unregister entry from the cleanup list if requested */
	if (unregisterCleanup)
	{
		Cache_UnregisterCleanup(cache, entry);
	}

	PG_TRY();
	{
		/* Call client-specific cleanup function before removing entry from cache */
		cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(entry));
	}
	PG_CATCH();
	{

		/* Grab entry lock to ensure exclusive access to it while we're touching it */
		Cache_LockEntry(cache, entry);

		/* No need for atomic operations as long as we hold the entry lock */
		entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, entry);
#endif

		Cache_UnlockEntry(cache, entry);

		/* Link entry back in the freelist */
		Cache_AddToFreelist(cache, entry);

		PG_RE_THROW();
	}
	PG_END_TRY();

	/* Grab entry lock to ensure exclusive access to it while we're touching it */
	Cache_LockEntry(cache, entry);

	/* No need for atomic operations as long as we hold the entry lock */
	entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
	Cache_MemsetPayload(cache, entry);
#endif

	Cache_UnlockEntry(cache, entry);

	Cache_AddToFreelist(cache, entry);

	Cache_DecPerfCounter(&cache->cacheHdr->cacheStats.noAcquiredEntries, 1 /* delta */ );
}
/*
 * Compute the hashcode for a given cache entry.
 * Uses the hash function specified in the cache.
 */
static void
Cache_ComputeEntryHashcode(Cache *cache, CacheEntry *entry)
{
	Assert(NULL != cache);
	Assert(NULL != entry);

	void *payload = CACHE_ENTRY_PAYLOAD(entry);
	void *key = (void *) ((char *) payload + cache->cacheHdr->keyOffset);
	entry->hashvalue = cache->hash(key, cache->cacheHdr->keySize);
}
/*
 * Look up an entry in the Global MDVSN component.
 * To avoid any concurrency issues, this returns a copy of the entry,
 * palloc'ed in the current memory context. The caller is responsible
 * for freeing this copy.
 *
 * 	 Returns a copy of the entry if found, NULL otherwise.
 *
 */
mdver_entry *
mdver_glob_mdvsn_find(Oid oid)
{

	Assert(NULL != mdver_glob_mdvsn);

	mdver_entry mdver_info;
	mdver_info.key = oid;

	/* FIXME gcaragea 03/18/2014: Trigger evictions if cache is full (MPP-22923) */
	CacheEntry *localEntry = Cache_AcquireEntry(mdver_glob_mdvsn, &mdver_info);
	Assert(NULL != localEntry);

	CacheEntry *cachedEntry = Cache_Lookup(mdver_glob_mdvsn, localEntry);

	/* Release local entry. We don't need it anymore */
	Cache_Release(mdver_glob_mdvsn, localEntry);

	mdver_entry *mdver_copy = NULL;
	if (NULL != cachedEntry)
	{
		/* Found a match. Make a local copy */
		mdver_entry *shared_mdver = (mdver_entry *) CACHE_ENTRY_PAYLOAD(cachedEntry);
		mdver_copy = (mdver_entry *) palloc0(sizeof(mdver_entry));

		/* Lock entry to ensure atomicity of copy */
		Cache_LockEntry(mdver_glob_mdvsn, cachedEntry);

		memcpy(mdver_copy, shared_mdver, sizeof(mdver_entry));

		/* Got the copy, unlock entry */
		Cache_UnlockEntry(mdver_glob_mdvsn, cachedEntry);

		/*
		 * We're also done with the entry, release our pincount on it
		 *
		 * TODO gcaragea 05/02/2014: Are there cases where we need to hold the
		 * entry past this point? (MPP-22923)
		 */
		Cache_Release(mdver_glob_mdvsn, cachedEntry);
	}

	return mdver_copy;
}
Beispiel #7
0
/*
 * Retrieve a new cache entry from the pre-allocated freelist.
 * The client has to either insert the entry in the cache or surrender it.
 *
 * This function calls the populateEntry callback function to populate the
 * entry before returning it to the client.
 *
 * populate_param is the opaque parameter to be passed to the populateEntry function.
 *
 * Return NULL if freelist is empty.
 *
 */
CacheEntry *
Cache_AcquireEntry(Cache *cache, void *populate_param)
{
	Assert(NULL != cache);

	CacheEntry *newEntry = Cache_GetFreeElement(cache);
	if (NULL == newEntry)
	{
		return NULL;
	}

	CACHE_ASSERT_WIPED(newEntry);

	uint32 expected = CACHE_ENTRY_FREE;
#ifdef USE_ASSERT_CHECKING
	int32 casResult =
#endif
	pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&newEntry->state, &expected, CACHE_ENTRY_RESERVED);
	Assert(1 == casResult);

	/*
	 * In RESERVED state nobody else will try to read this entry, not even
	 * the views. No need to lock the entry while populating.
	 */

	if (cache->populateEntry)
	{
		cache->populateEntry(CACHE_ENTRY_PAYLOAD(newEntry), populate_param);
	}

	expected = CACHE_ENTRY_RESERVED;
#ifdef USE_ASSERT_CHECKING
	casResult =
#endif
	pg_atomic_compare_exchange_u32((pg_atomic_uint32 *)&newEntry->state, &expected, CACHE_ENTRY_ACQUIRED);
	Assert(1 == casResult);

	Cache_RegisterCleanup(cache, newEntry, false /* isCachedEntry */ );

	return newEntry;
}
/*
 * Run cache eviction algorithm
 *
 * It will try to evict enough entries to add up to evictSize. Returns the
 * actual accumulated size of the entries evicted
 */
int64
Cache_Evict(Cache *cache, int64 evictRequestSize)
{
	Assert(NULL != cache);
	Assert(evictRequestSize > 0);

	Cache_TimedOperationStart();

	int64 evictedSize = 0;
	uint32 unsuccessfulLoops = 0;
	bool foundVictim = false;
	uint32 decAmount = cache->cacheHdr->policyContext.utilityDecrement;
	Cache_Stats *cacheStats = &cache->cacheHdr->cacheStats;

	while (true)
	{

		bool wraparound = false;
		int32 entryIdx = Cache_NextClockHand(cache, &wraparound);
		Assert(entryIdx < cache->cacheHdr->nEntries);

		Cache_UpdatePerfCounter(&cacheStats->noEntriesScanned,1 /* delta */);

		if (wraparound)
		{
			unsuccessfulLoops++;

			Cache_UpdatePerfCounter(&cacheStats->noWraparound, 1 /* delta */);

			if (!foundVictim)
			{
				/*
				 * We looped around and did not manage to evict any entries.
				 * Double the amount we decrement eviction candidate's utility by.
				 * This makes the eviction algorithm look for a victim more aggressively
				 */
				if (decAmount <= CACHE_MAX_UTILITY / 2)
				{
					decAmount = 2 * decAmount;
				}
				else
				{
					decAmount = CACHE_MAX_UTILITY;
				}
			}
			foundVictim = false;

			if (unsuccessfulLoops > cache->cacheHdr->policyContext.maxClockLoops)
			{
				/* Can't find any cached and unused entries candidates for evictions, even after looping around
				 * maxClockLoops times. Give up looking for victims. */
				Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);
				break;
			}
		}

		CacheEntry *crtEntry = Cache_GetEntryByIndex(cache->cacheHdr, entryIdx);
		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Not interested in free/acquired/deleted entries. Go back and advance clock hand */
			continue;
		}

		CacheAnchor *anchor = (CacheAnchor *) SyncHTLookup(cache->syncHashtable, &crtEntry->hashvalue);
		if (NULL == anchor)
		{
			/* There's no anchor for this entry, someone might have snatched it in the meantime */
			continue;
		}

		SpinLockAcquire(&anchor->spinlock);

		if (crtEntry->state != CACHE_ENTRY_CACHED)
		{
			/* Someone freed this entry in the meantime, before we got a chance to acquire the anchor lock */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Ok, did all the checks, this entry must be valid now */
		CACHE_ASSERT_VALID(crtEntry);

		if (crtEntry->pinCount > 0)
		{
			/* Entry is in use and can't be evicted. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Decrement utility */
		gp_atomic_dec_positive_32(&crtEntry->utility, decAmount);
		/* Just decremented someone's utility. Reset our unsuccessful loops counter */
		unsuccessfulLoops = 0;

		if (crtEntry->utility > 0)
		{
			/* Entry has non-zero utility, we shouldn't evict it. Go back and advance clock hand */
			SpinLockRelease(&anchor->spinlock);
			SyncHTRelease(cache->syncHashtable, (void *) anchor);
			continue;
		}

		/* Found our victim */
		Assert(0 == crtEntry->pinCount);
		CACHE_ASSERT_VALID(crtEntry);
		Assert(crtEntry->utility == 0);

#if USE_ASSERT_CHECKING
		int32 casResult =
#endif
		compare_and_swap_32(&crtEntry->state, CACHE_ENTRY_CACHED, CACHE_ENTRY_DELETED);
		Assert(1 == casResult);

		SpinLockRelease(&anchor->spinlock);
		foundVictim = true;
		evictedSize += crtEntry->size;

		/* Don't update noFreeEntries yet. It will be done in Cache_AddToFreelist */
		Cache_UpdatePerfCounter(&cacheStats->noCachedEntries, -1 /* delta */);

		/* Unlink entry from the anchor chain */
		SpinLockAcquire(&anchor->spinlock);
		Cache_UnlinkEntry(cache, anchor, crtEntry);
		SpinLockRelease(&anchor->spinlock);

		SyncHTRelease(cache->syncHashtable, (void *) anchor);

		if (NULL != cache->cleanupEntry)
		{
			/* Call client-side cleanup for entry */
			cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(crtEntry));
		}

		Cache_LockEntry(cache, crtEntry);

		Assert(crtEntry->state == CACHE_ENTRY_DELETED);
		crtEntry->state = CACHE_ENTRY_FREE;

#if USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, crtEntry);
#endif

		Cache_UnlockEntry(cache, crtEntry);

		Cache_AddToFreelist(cache, crtEntry);

		Cache_UpdatePerfCounter(&cacheStats->noEvicts, 1 /* delta */);
		Cache_TimedOperationRecord(&cacheStats->timeEvictions, &cacheStats->maxTimeEvict);

		if (evictedSize >= evictRequestSize)
		{
			/* We evicted as much as requested */
			break;
		}

		Cache_TimedOperationStart();

	}

	return evictedSize;
}
Beispiel #9
0
/*
 * When a backend is requesting the more recent version of an object,
 * if the Local MDVSN cache doesn't have the version, and if a NUKE event
 * hasn't been encountered in the current transaction, it is looked up
 * in the Global MDVSN shared cache.
 *
 * If the object is found in Global MDVSN, return the global version.
 * If the object is not found, generate a new version, record it in Global MDVSN
 * and then return it.
 *
 *   key: The key of the looked-up object
 *   ddl_version: used to return the ddl version for the object
 *   dml_version: used to return the dml version for the object
 *
 */
static void
mdver_request_from_global(Oid key, uint64 *ddl_version, uint64 *dml_version)
{

	Assert(NULL != ddl_version);
	Assert(NULL != dml_version);

	Cache *mdver_glob_mdvsn = mdver_get_glob_mdvsn();
	Assert(NULL != mdver_glob_mdvsn);

	mdver_entry entry = {key, INVALID_MD_VERSION, INVALID_MD_VERSION};

	/* FIXME gcaragea 06/03/2014: Trigger evictions if cache is full (MPP-22923) */
	CacheEntry *localEntry = Cache_AcquireEntry(mdver_glob_mdvsn, &entry);

	Assert(NULL != localEntry);

	/*
	 * We're about to look-up and insert a shared cache entry.
	 * Grab writer lock in exclusive mode, so that no other backend
	 * can insert or update the same entry at the same time.
	 */
	LWLockAcquire(MDVerWriteLock, LW_EXCLUSIVE);

	CacheEntry *cachedEntry = Cache_Lookup(mdver_glob_mdvsn, localEntry);

	if (NULL != cachedEntry)
	{
		/* Not found in LVSN, not nuke happened, eventually found in GVSN */
		mdver_entry *crt_entry = CACHE_ENTRY_PAYLOAD(cachedEntry);

		*ddl_version = crt_entry->ddl_version;
		*dml_version = crt_entry->dml_version;

#ifdef MD_VERSIONING_INSTRUMENTATION
		elog(gp_mdversioning_loglevel, "Found version in Global MDVSN: (%d, " UINT64_FORMAT ", " UINT64_FORMAT "). Adding it to Local MDVSN",
				key, crt_entry->ddl_version, crt_entry->dml_version);
#endif

		/*
		 * We're also done with the entry, release our pincount on it
		 *
		 * TODO gcaragea 05/02/2014: Are there cases where we need to hold the
		 * entry past this point? (MPP-22923)
		 */

		Cache_Release(mdver_glob_mdvsn, cachedEntry);
	}
	else
	{
		/* Not found in LVSN, not nuke happened, not found in GVSN either */

		/* Generate new version */
		*ddl_version = mdver_next_global_version();
		*dml_version = mdver_next_global_version();

		/* Add to GVSN */
		mdver_entry *new_entry = CACHE_ENTRY_PAYLOAD(localEntry);
		new_entry->ddl_version = *ddl_version;
		new_entry->dml_version = *dml_version;

#ifdef MD_VERSIONING_INSTRUMENTATION
		elog(gp_mdversioning_loglevel, "Inserting new version in Global MDVSN: (%d, " UINT64_FORMAT ", " UINT64_FORMAT "). Adding it to Local MDVSN",
				key, new_entry->ddl_version, new_entry->dml_version);
#endif

		Cache_Insert(mdver_glob_mdvsn, localEntry);

	}

	LWLockRelease(MDVerWriteLock);

	/* Release local entry. We don't need it anymore */
	Cache_Release(mdver_glob_mdvsn, localEntry);
}
/*
 * Reconcile an incoming versioning event with an existing Global MDVSN entry
 * for the same versioned object.
 *
 * Each versioning event contains the old version and the new version as known
 * by the originating backend:
 *   VE = (key, oldV, newV)
 * Cached entry contains the current version globally visible:
 *   entry = (key, crtV)
 *
 * We have the following scenarios:
 *  - If oldV == crtV, (i.e. VE old version is the same as the current version)
 *     then nobody else has modified the object since the backend read it.
 *     We simply update the entry with the new version in that case:
 *       entry = (key, crtV) --> entry = (key, newV)
 *
 *  - If oldV < crtV, (i.e. VE old version is different than the current version)
 *     some other backend must have modified the object in the meantime.
 *    We generate an entirely new version new_newV for the object to reflect
 *     the new "combined" object.
 *
 *    The cached entry is updated directly with the new version:
 *        entry = (key, crtV) --> entry = (key, new_newV)
 *
 *    The versioning event in the queue is updated directly:
         VE = (key, oldV, newV)  --> VE = (key, crtV, new_newV)
 *
 *  event: The event containing the versioning information for an update
 *  cached_entry: The existing entry for this object in the Global MDVSN
 *
 * This function is called while the MDVerWriteLock is held in exclusive
 * mode. Don't do anything that is not allowed while holding a LWLock
 * (e.g. allocate memory, or call unsafe functions).
 *
 */
static void
mdver_globalhandler_reconcile(mdver_event *event, CacheEntry *cached_entry)
{

    /* Found existing entry, reconcile and update the version */
    mdver_entry *cached_mdver_entry = CACHE_ENTRY_PAYLOAD(cached_entry);

#ifdef MD_VERSIONING_INSTRUMENTATION
    elog(gp_mdversioning_loglevel, "Updating GlobalMDVSN entry %d: Current (%d,%d). Event: [(%d,%d)->(%d,%d)]",
         event->key,
         (int) cached_mdver_entry->ddl_version, (int) cached_mdver_entry->dml_version,
         (int) event->old_ddl_version, (int) event->old_dml_version,
         (int) event->new_ddl_version, (int) event->new_dml_version);
#endif

    /*
     * Reconcile and resolve conflicts for incoming versioning events.
     *  When a new versioning event is received at the Global MDVSN,
     *  look up if the same object has a conflicting version.
     * If so, resolve conflict by generating a new version.
     */

    uint64 new_ddl_version = event->new_ddl_version;
    uint64 new_dml_version = event->new_dml_version;
    bool conflict = false;

    /*
     * It is safe to read the cached_mdver_entry contents, since
     * we're holding the write lock on the Global MDVSN cache.
     */
    if (cached_mdver_entry->ddl_version != event->old_ddl_version)
    {
        new_ddl_version = mdver_next_global_version();
        conflict = true;
    }

    if (cached_mdver_entry->dml_version != event->old_dml_version)
    {
        new_dml_version = mdver_next_global_version();
        conflict = true;
    }

    if (conflict)
    {

#ifdef MD_VERSIONING_INSTRUMENTATION
        elog(gp_mdversioning_loglevel, "Updating event in the queue (pid=%d, oid=%d): Old event: [(%d,%d)->(%d,%d)]. Modified event: [(%d,%d)->(%d,%d)]",
             event->backend_pid,
             event->key,
             /* Old event */
             (int) event->old_ddl_version, (int) event->old_dml_version,
             (int) event->new_ddl_version, (int) event->new_dml_version,
             /* New event */
             (int) cached_mdver_entry->ddl_version, (int) cached_mdver_entry->dml_version,
             (int) new_ddl_version, (int) new_dml_version);
#endif

        /*
         * A new version for this object is being generated here.
         * We're going to directly update the event in the queue with the new
         * version.
         */

        event->new_ddl_version = new_ddl_version;
        event->new_dml_version = new_dml_version;

        /*
         * We're also updating the VE old version to reflect the current
         * visible global version
         */
        event->old_ddl_version = cached_mdver_entry->ddl_version;
        event->old_dml_version = cached_mdver_entry->dml_version;
    }

    /* About to update the cached entry. Lock entry to make update atomic */
    Cache *glob_mdvsn = mdver_get_glob_mdvsn();
    Cache_LockEntry(glob_mdvsn, cached_entry);

    cached_mdver_entry->ddl_version = new_ddl_version;
    cached_mdver_entry->dml_version = new_dml_version;

    Cache_UnlockEntry(glob_mdvsn, cached_entry);

}
/*
 * Internal version of the CacheRelease function
 *
 * Unregisters the entry from the cleanup list if requested.
 */
static void
Cache_ReleaseCached(Cache *cache, CacheEntry *entry, bool unregisterCleanup)
{
	Assert(NULL != cache);
	Assert(NULL != entry);
	Assert(CACHE_ENTRY_CACHED == entry->state || CACHE_ENTRY_DELETED == entry->state);

	Cache_ComputeEntryHashcode(cache, entry);

	volatile CacheAnchor *anchor = SyncHTLookup(cache->syncHashtable, &entry->hashvalue);
	Assert(anchor != NULL);

	/* Acquire anchor lock to touch the entry */
	SpinLockAcquire(&anchor->spinlock);
	Cache_LockEntry(cache, entry);

	uint32 pinCount = Cache_EntryDecRef(cache, entry);
	bool deleteEntry = false;

	if (pinCount == 0 && entry->state == CACHE_ENTRY_DELETED)
	{
		/* Delete the cache entry if pin-count = 0 and it is marked for deletion */
		Cache_UnlinkEntry(cache, (CacheAnchor *) anchor, entry);
		deleteEntry = true;

		Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noDeletedEntries, -1 /* delta */);
	}

	Cache_UnlockEntry(cache, entry);
	SpinLockRelease(&anchor->spinlock);

	/*
	 * Releasing anchor to hashtable.
	 * Ignoring 'removed' return value, both values are valid
	 */
	SyncHTRelease(cache->syncHashtable, (void *) anchor);

	/* If requested, unregister entry from the cleanup list */
	if (unregisterCleanup)
	{
		Cache_UnregisterCleanup(cache, entry);
	}

	if (deleteEntry)
	{

		if (NULL != cache->cleanupEntry)
		{
			PG_TRY();
			{
				/* Call client-specific cleanup function before removing entry from cache */
				cache->cleanupEntry(CACHE_ENTRY_PAYLOAD(entry));
			}
			PG_CATCH();
			{

				/* Grab entry lock to ensure exclusive access to it while we're touching it */
				Cache_LockEntry(cache, entry);

				Assert(CACHE_ENTRY_DELETED == entry->state);
				entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
				Cache_MemsetPayload(cache, entry);
#endif

				Cache_UnlockEntry(cache, entry);

				/* Link entry back in the freelist */
				Cache_AddToFreelist(cache, entry);

				PG_RE_THROW();
			}
			PG_END_TRY();
		}

		/* Grab entry lock to ensure exclusive access to it while we're touching it */
		Cache_LockEntry(cache, entry);

		entry->state = CACHE_ENTRY_FREE;

#ifdef USE_ASSERT_CHECKING
		Cache_MemsetPayload(cache, entry);
#endif

		Cache_UnlockEntry(cache, entry);

		/* Link entry back in the freelist */
		Cache_AddToFreelist(cache, entry);
	}
}
/*
 * Look up an exact match for a cache entry
 *
 * Returns the matching cache entry if found, NULL otherwise
 */
CacheEntry *
Cache_Lookup(Cache *cache, CacheEntry *entry)
{
	Assert(NULL != cache);
	Assert(NULL != entry);

	Cache_TimedOperationStart();
	Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noLookups, 1 /* delta */);

	/* Advance the clock for the replacement policy */
	Cache_AdvanceClock(cache);

	Cache_ComputeEntryHashcode(cache, entry);

	volatile CacheAnchor *anchor = SyncHTLookup(cache->syncHashtable, &entry->hashvalue);
	if (NULL == anchor)
	{
		/* No matching anchor found, there can't be a matching element in the cache */
		Cache_TimedOperationRecord(&cache->cacheHdr->cacheStats.timeLookups,
				&cache->cacheHdr->cacheStats.maxTimeLookup);
		return NULL;
	}

	/* Acquire anchor lock to touch the chain */
	SpinLockAcquire(&anchor->spinlock);

	CacheEntry *crtEntry = anchor->firstEntry;

	while (true)
	{

		while (NULL != crtEntry && crtEntry->state == CACHE_ENTRY_DELETED)
		{
			/* Skip over deleted entries */
			crtEntry = crtEntry->nextEntry;
		}

		if (NULL == crtEntry)
		{
			/* No valid entries found in the chain */
			SpinLockRelease(&anchor->spinlock);
			Cache_TimedOperationRecord(&cache->cacheHdr->cacheStats.timeLookups,
					&cache->cacheHdr->cacheStats.maxTimeLookup);
			return NULL;
		}

		/* Found a valid entry. AddRef it and test to see if it matches */
		Cache_EntryAddRef(cache, crtEntry);

		SpinLockRelease(&anchor->spinlock);

		/* Register it for cleanup in case we get an error while testing for equality */
		Cache_RegisterCleanup(cache, crtEntry, true /* isCachedEntry */);

		Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noCompares, 1 /* delta */);

		if(cache->equivalentEntries(CACHE_ENTRY_PAYLOAD(entry),
				CACHE_ENTRY_PAYLOAD(crtEntry)))
		{
			/* Found the match, we're done */
			Cache_TouchEntry(cache, crtEntry);
			Cache_UpdatePerfCounter(&cache->cacheHdr->cacheStats.noCacheHits, 1 /* delta */);
			break;
		}

		/* Unregister it from cleanup since it wasn't the one */
		Cache_UnregisterCleanup(cache, crtEntry);

		SpinLockAcquire(&anchor->spinlock);

		Cache_EntryDecRef(cache, crtEntry);

		crtEntry = crtEntry->nextEntry;
	}

	/* ignoring return value, both values are valid */
	SyncHTRelease(cache->syncHashtable, (void *) anchor);

	Cache_TimedOperationRecord(&cache->cacheHdr->cacheStats.timeLookups,
			&cache->cacheHdr->cacheStats.maxTimeLookup);
	return crtEntry;
}
/*
 * MemSet the payload of an entry with a pattern to prevent a client from
 * accidentally using a surrendered entry's payload.
 */
void
Cache_MemsetPayload(Cache *cache, CacheEntry *entry)
{
	void *payload = CACHE_ENTRY_PAYLOAD(entry);
	MemSet(payload, CACHE_MEMSET_BYTE_PATTERN, cache->cacheHdr->entrySize);
}
Beispiel #14
0
/*
 * Function returning all workfile cache entries for one segment
 */
Datum
gp_workfile_mgr_cache_entries(PG_FUNCTION_ARGS)
{

	FuncCallContext *funcctx;
	int32 *crtIndexPtr;

	if (SRF_IS_FIRSTCALL())
	{
		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/* Switch to memory context appropriate for multiple function calls */
		MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/*
		 * Build a tuple descriptor for our result type
		 * The number and type of attributes have to match the definition of the
		 * view gp_workfile_mgr_cache_entries
		 */
		TupleDesc tupdesc = CreateTemplateTupleDesc(NUM_CACHE_ENTRIES_ELEM, false);

		Assert(NUM_CACHE_ENTRIES_ELEM == 12);

		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "path",
				TEXTOID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "hash",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "size",
				INT8OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "state",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6, "workmem",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7, "optype",
				TEXTOID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 8, "slice",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 9, "sessionid",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 10, "commandid",
				INT4OID, -1 /* typmod */, 0 /* attdim */);
		TupleDescInitEntry(tupdesc, (AttrNumber) 11, "query_start",
				TIMESTAMPTZOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 12, "numfiles",
				INT4OID, -1 /* typmod */, 0 /* attdim */);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		crtIndexPtr = (int32 *) palloc(sizeof(*crtIndexPtr));
		*crtIndexPtr = 0;
		funcctx->user_fctx = crtIndexPtr;
		MemoryContextSwitchTo(oldcontext);
	}

	Cache *cache = workfile_mgr_get_cache();
	funcctx = SRF_PERCALL_SETUP();
	crtIndexPtr = (int32 *) funcctx->user_fctx;

	while (true)
	{

		CacheEntry *crtEntry = next_entry_to_list(cache, crtIndexPtr);

		if (!crtEntry)
		{
			/* Reached the end of the entry array, we're done */
			SRF_RETURN_DONE(funcctx);
		}

		Datum		values[NUM_CACHE_ENTRIES_ELEM];
		bool		nulls[NUM_CACHE_ENTRIES_ELEM];
		MemSet(nulls, 0, sizeof(nulls));

		workfile_set *work_set = CACHE_ENTRY_PAYLOAD(crtEntry);
		char work_set_path[MAXPGPATH] = "";
		char *work_set_operator_name = NULL;


		/*
		 * Lock entry in order to read its payload
		 * Don't call any functions that can get interrupted or
		 * that palloc memory while holding this lock.
		 */
		Cache_LockEntry(cache, crtEntry);

		if (!should_list_entry(crtEntry))
		{
			Cache_UnlockEntry(cache, crtEntry);
			continue;
		}

		values[0] = Int32GetDatum(GpIdentity.segindex);
		strlcpy(work_set_path, work_set->path, MAXPGPATH);

		values[2] = UInt32GetDatum(crtEntry->hashvalue);

		int64 work_set_size = work_set->size;
		if (crtEntry->state == CACHE_ENTRY_ACQUIRED)
		{
			/*
			 * work_set->size is not updated until the entry is cached.
			 * For in-progress queries, the up-to-date size is stored in
			 * work_set->in_progress_size.
			 */
			work_set_size = work_set->in_progress_size;
		}

		values[3] = Int64GetDatum(work_set_size);
		values[4] = UInt32GetDatum(crtEntry->state);
		values[5] = UInt32GetDatum(work_set->metadata.operator_work_mem);

		work_set_operator_name = gp_workfile_operator_name(work_set->node_type);
		values[7] = UInt32GetDatum(work_set->slice_id);
		values[8] = UInt32GetDatum(work_set->session_id);
		values[9] = UInt32GetDatum(work_set->command_count);
		values[10] = TimestampTzGetDatum(work_set->session_start_time);
		values[11] = UInt32GetDatum(work_set->no_files);

		/* Done reading from the payload of the entry, release lock */
		Cache_UnlockEntry(cache, crtEntry);

		/*
		 * Fill in the rest of the entries of the tuple with data copied
		 * from the descriptor.
		 * CStringGetTextDatum calls palloc so we cannot do this while
		 * holding the lock above.
		 */
		values[1] = CStringGetTextDatum(work_set_path);
		values[6] = CStringGetTextDatum(work_set_operator_name);

		HeapTuple tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		Datum result = HeapTupleGetDatum(tuple);
		SRF_RETURN_NEXT(funcctx, result);
	}
}
Beispiel #15
0
/*
 * Create a new file set
 *   type is the WorkFileType for the files: BUFFILE or BFZ
 *   can_be_reused: if set to false, then we don't insert this set into the cache,
 *     since the caller is telling us there is no point. This can happen for
 *     example when spilling during index creation.
 *   ps is the PlanState for the subtree rooted at the operator
 *   snapshot contains snapshot information for the current transaction
 *
 */
workfile_set *
workfile_mgr_create_set(enum ExecWorkFileType type, bool can_be_reused, PlanState *ps, workfile_set_snapshot snapshot)
{
	Assert(NULL != workfile_mgr_cache);

	Plan *plan = NULL;
	if (ps != NULL)
	{
		plan = ps->plan;
	}

	AssertImply(can_be_reused, plan != NULL);

	NodeTag node_type = T_Invalid;
	if (ps != NULL)
	{
		node_type = ps->type;
	}
	char *dir_path = create_workset_directory(node_type, currentSliceId);

	/* Create parameter info for the populate function */
	workset_info set_info;
	set_info.file_type = type;
	set_info.snapshot = snapshot;
	set_info.nodeType = node_type;
	set_info.can_be_reused = can_be_reused && workfile_mgr_is_reusable(ps);
	set_info.dir_path = dir_path;
	set_info.session_start_time = GetCurrentTimestamp();
	set_info.operator_work_mem = get_operator_work_mem(ps);
	set_info.on_disk = true;

	CacheEntry *newEntry = NULL;

	PG_TRY();
	{
		newEntry = acquire_entry_retry(workfile_mgr_cache, &set_info);
	}
	PG_CATCH();
	{
		/* Failed to acquire new entry, cache full. Clean up the directory we created. */
		workfile_mgr_delete_set_directory(dir_path);
		PG_RE_THROW();
	}
	PG_END_TRY();

	/* Path has now been copied to the workfile_set. We can free it */
	pfree(dir_path);

	/* Complete initialization of the entry with post-acquire actions */
	Assert(NULL != newEntry);
	workfile_set *work_set = CACHE_ENTRY_PAYLOAD(newEntry);
	Assert(work_set != NULL);
	if (work_set->can_be_reused)
	{
		Assert(plan != NULL);
		Assert(nodeTag(plan) >= T_Plan && nodeTag(plan) < T_PlanInvalItem);

		workfile_set_plan *s_plan = workfile_mgr_serialize_plan(ps);
		work_set->key = workfile_mgr_hash_key(s_plan);
		workfile_mgr_save_plan(work_set, s_plan);
		workfile_mgr_free_plan(s_plan);
	}

	elog(gp_workfile_caching_loglevel, "new spill file set. key=0x%x can_be_reused=%d prefix=%s opMemKB=" INT64_FORMAT,
			work_set->key, work_set->can_be_reused, work_set->path, work_set->metadata.operator_work_mem);

	return work_set;
}