Example #1
0
/*
 * Make sure that SUBTRANS has room for a newly-allocated XID.
 *
 * NB: this is called while holding XidGenLock.  We want it to be very fast
 * most of the time; even when it's not so fast, no actual I/O need happen
 * unless we're forced to write out a dirty subtrans page to make room
 * in shared memory.
 */
void
ExtendSUBTRANS(TransactionId newestXact)
{
	int			pageno;

	/*
	 * Caller must have already taken mirrored lock shared.
	 */

	/*
	 * No work except at first XID of a page.  But beware: just after
	 * wraparound, the first XID of page zero is FirstNormalTransactionId.
	 */
	if (TransactionIdToEntry(newestXact) != 0 &&
		!TransactionIdEquals(newestXact, FirstNormalTransactionId))
		return;

	pageno = TransactionIdToPage(newestXact);

	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);

	/* Zero the page */
	ZeroSUBTRANSPage(pageno);

	LWLockRelease(SubtransControlLock);
}
Example #2
0
/*
 *		XactLockTableWait
 *
 * Wait for the specified transaction to commit or abort.
 */
void
XactLockTableWait(TransactionId xid)
{
	LOCKTAG		tag;
	TransactionId myxid = GetCurrentTransactionId();

	Assert(!TransactionIdEquals(xid, myxid));

	MemSet(&tag, 0, sizeof(tag));
	tag.relId = XactLockTableId;
	tag.dbId = InvalidOid;
	tag.objId.xid = xid;

	if (!LockAcquire(LockTableId, &tag, myxid,
					 ShareLock, false))
		elog(ERROR, "LockAcquire failed");

	LockRelease(LockTableId, &tag, myxid, ShareLock);

	/*
	 * Transaction was committed/aborted/crashed - we have to update
	 * pg_clog if transaction is still marked as running.
	 */
	if (!TransactionIdDidCommit(xid) && !TransactionIdDidAbort(xid))
		TransactionIdAbort(xid);
}
Example #3
0
/*
 * TransactionIdGetCommitLSN
 *
 * This function returns an LSN that is late enough to be able
 * to guarantee that if we flush up to the LSN returned then we
 * will have flushed the transaction's commit record to disk.
 *
 * The result is not necessarily the exact LSN of the transaction's
 * commit record!  For example, for long-past transactions (those whose
 * clog pages already migrated to disk), we'll return InvalidXLogRecPtr.
 * Also, because we group transactions on the same clog page to conserve
 * storage, we might return the LSN of a later transaction that falls into
 * the same group.
 */
XLogRecPtr
TransactionIdGetCommitLSN(TransactionId xid)
{
	XLogRecPtr	result;

	/*
	 * Currently, all uses of this function are for xids that were just
	 * reported to be committed by TransactionLogFetch, so we expect that
	 * checking TransactionLogFetch's cache will usually succeed and avoid an
	 * extra trip to shared memory.
	 */
	if (TransactionIdEquals(xid, cachedFetchXid))
		return cachedCommitLSN;

	/* Special XIDs are always known committed */
	if (!TransactionIdIsNormal(xid))
		return InvalidXLogRecPtr;

	/*
	 * Get the transaction status.
	 */
	(void) TransactionIdGetStatus(xid, &result);

	return result;
}
Example #4
0
/*
 * Make sure that CommitTs has room for a newly-allocated XID.
 *
 * NB: this is called while holding XidGenLock.  We want it to be very fast
 * most of the time; even when it's not so fast, no actual I/O need happen
 * unless we're forced to write out a dirty CommitTs or xlog page to make room
 * in shared memory.
 *
 * NB: the current implementation relies on track_commit_timestamp being
 * PGC_POSTMASTER.
 */
void
ExtendCommitTs(TransactionId newestXact)
{
	int			pageno;

	/*
	 * Nothing to do if module not enabled.  Note we do an unlocked read of
	 * the flag here, which is okay because this routine is only called from
	 * GetNewTransactionId, which is never called in a standby.
	 */
	Assert(!InRecovery);
	if (!commitTsShared->commitTsActive)
		return;

	/*
	 * No work except at first XID of a page.  But beware: just after
	 * wraparound, the first XID of page zero is FirstNormalTransactionId.
	 */
	if (TransactionIdToCTsEntry(newestXact) != 0 &&
		!TransactionIdEquals(newestXact, FirstNormalTransactionId))
		return;

	pageno = TransactionIdToCTsPage(newestXact);

	LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE);

	/* Zero the page and make an XLOG entry about it */
	ZeroCommitTsPage(pageno, !InRecovery);

	LWLockRelease(CommitTsControlLock);
}
Example #5
0
/*
 * TransactionIdIsInProgress -- is given transaction running by some backend
 */
bool
TransactionIdIsInProgress(TransactionId xid)
{
	bool		result = false;
	SISeg	   *segP = shmInvalBuffer;
	ProcState  *stateP = segP->procState;
	int			index;

	LWLockAcquire(SInvalLock, LW_SHARED);

	for (index = 0; index < segP->lastBackend; index++)
	{
		SHMEM_OFFSET pOffset = stateP[index].procStruct;

		if (pOffset != INVALID_OFFSET)
		{
			PGPROC	   *proc = (PGPROC *) MAKE_PTR(pOffset);

			/* Fetch xid just once - see GetNewTransactionId */
			TransactionId pxid = proc->xid;

			if (TransactionIdEquals(pxid, xid))
			{
				result = true;
				break;
			}
		}
	}

	LWLockRelease(SInvalLock);

	return result;
}
Example #6
0
/*
 * Make sure that SUBTRANS has room for a newly-allocated XID.
 *
 * NB: this is called while holding XidGenLock.  We want it to be very fast
 * most of the time; even when it's not so fast, no actual I/O need happen
 * unless we're forced to write out a dirty subtrans page to make room
 * in shared memory.
 */
void
ExtendSUBTRANS(TransactionId newestXact)
{
	int			pageno;

	/*
	 * No work except at first XID of a page.  But beware: just after
	 * wraparound, the first XID of page zero is FirstNormalTransactionId.
	 */
#ifdef PGXC  /* PGXC_COORD || PGXC_DATANODE */
	/* 
	 * In PGXC, it may be that a node is not involved in a transaction,
	 * and therefore will be skipped, so we need to detect this by using
	 * the latest_page_number instead of the pg index.
	 *
	 * Also, there is a special case of when transactions wrap-around that
	 * we need to detect.
	 */
	pageno = TransactionIdToPage(newestXact);

	/* 
	 * The first condition makes sure we did not wrap around 
	 * The second checks if we are still using the same page.
	 * Note that this value can change and we are not holding a lock, 
	 * so we repeat the check below. We do it this way instead of 
	 * grabbing the lock to avoid lock contention.
	 */
	if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA 
			&& pageno <= SubTransCtl->shared->latest_page_number)
		return;
#else
	if (TransactionIdToEntry(newestXact) != 0 &&
		!TransactionIdEquals(newestXact, FirstNormalTransactionId))
		return;

	pageno = TransactionIdToPage(newestXact);
#endif

	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);

#ifdef PGXC
	/*
	 * We repeat the check.  Another process may have written 
	 * out the page already and advanced the latest_page_number
	 * while we were waiting for the lock.
	 */
	if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA 
			&& pageno <= SubTransCtl->shared->latest_page_number)
	{
		LWLockRelease(SubtransControlLock);
		return;
	}
#endif

	/* Zero the page */
	ZeroSUBTRANSPage(pageno);

	LWLockRelease(SubtransControlLock);
}
/*
 *		xideq			- are two xids equal?
 */
Datum
xideq(PG_FUNCTION_ARGS)
{
    TransactionId xid1 = PG_GETARG_TRANSACTIONID(0);
    TransactionId xid2 = PG_GETARG_TRANSACTIONID(1);

    PG_RETURN_BOOL(TransactionIdEquals(xid1, xid2));
}
Example #8
0
/*
 * TransactionLogFetch --- fetch commit status of specified transaction id
 */
static XidStatus
TransactionLogFetch(TransactionId transactionId)
{
	XidStatus	xidstatus;
	XLogRecPtr	xidlsn;

	/*
	 * Before going to the commit log manager, check our single item cache to
	 * see if we didn't just check the transaction status a moment ago.
	 */
	if (TransactionIdEquals(transactionId, cachedFetchXid))
		return cachedFetchXidStatus;

	/*
	 * Also, check to see if the transaction ID is a permanent one.
	 */
	if (!TransactionIdIsNormal(transactionId))
	{
		if (TransactionIdEquals(transactionId, BootstrapTransactionId))
			return TRANSACTION_STATUS_COMMITTED;
		if (TransactionIdEquals(transactionId, FrozenTransactionId))
			return TRANSACTION_STATUS_COMMITTED;
		return TRANSACTION_STATUS_ABORTED;
	}

	/*
	 * Get the transaction status.
	 */
	xidstatus = TransactionIdGetStatus(transactionId, &xidlsn);

	/*
	 * Cache it, but DO NOT cache status for unfinished or sub-committed
	 * transactions!  We only cache status that is guaranteed not to change.
	 */
	if (xidstatus != TRANSACTION_STATUS_IN_PROGRESS &&
		xidstatus != TRANSACTION_STATUS_SUB_COMMITTED)
	{
		cachedFetchXid = transactionId;
		cachedFetchXidStatus = xidstatus;
		cachedCommitLSN = xidlsn;
	}

	return xidstatus;
}
Example #9
0
/*
 * TransactionIdIsKnownCompleted
 *		True iff transaction associated with the identifier is currently
 *		known to have either committed or aborted.
 *
 * This does NOT look into pg_clog but merely probes our local cache
 * (and so it's not named TransactionIdDidComplete, which would be the
 * appropriate name for a function that worked that way).  The intended
 * use is just to short-circuit TransactionIdIsInProgress calls when doing
 * repeated tqual.c checks for the same XID.  If this isn't extremely fast
 * then it will be counterproductive.
 *
 * Note:
 *		Assumes transaction identifier is valid.
 */
bool
TransactionIdIsKnownCompleted(TransactionId transactionId)
{
	if (TransactionIdEquals(transactionId, cachedFetchXid))
	{
		/* If it's in the cache at all, it must be completed. */
		return true;
	}

	return false;
}
Example #10
0
/*
 *	TransactionIdIsCurrentTransactionId
 *
 *	During bootstrap, we cheat and say "it's not my transaction ID" even though
 *	it is.	Along with transam.c's cheat to say that the bootstrap XID is
 *	already committed, this causes the tqual.c routines to see previously
 *	inserted tuples as committed, which is what we need during bootstrap.
 */
bool
TransactionIdIsCurrentTransactionId(TransactionId xid)
{
	TransactionState s = CurrentTransactionState;

	if (AMI_OVERRIDE)
	{
		Assert(xid == BootstrapTransactionId);
		return false;
	}

	return TransactionIdEquals(xid, s->transactionIdData);
}
Example #11
0
/*
 * TransactionIdIsKnownCompleted
 *		True iff transaction associated with the identifier is currently
 *		known to have either committed or aborted.
 *
 * This does NOT look into pg_clog but merely probes our local cache
 * (and so it's not named TransactionIdDidComplete, which would be the
 * appropriate name for a function that worked that way).  The intended
 * use is just to short-circuit TransactionIdIsInProgress calls when doing
 * repeated tqual.c checks for the same XID.  If this isn't extremely fast
 * then it will be counterproductive.
 *
 * Note:
 *		Assumes transaction identifier is valid.
 */
bool
TransactionIdIsKnownCompleted(TransactionId transactionId)
{
	if (TransactionIdEquals(transactionId, cachedFetchXid))
	{
#ifdef PGXC
		syncGXID_GTM((GlobalTransactionId)transactionId);
#endif
		/* If it's in the cache at all, it must be completed. */
		return true;
	}

	return false;
}
Example #12
0
/*
 *		XactLockTableWait
 *
 * Wait for the specified transaction to commit or abort.  If an operation
 * is specified, an error context callback is set up.  If 'oper' is passed as
 * None, no error context callback is set up.
 *
 * Note that this does the right thing for subtransactions: if we wait on a
 * subtransaction, we will exit as soon as it aborts or its top parent commits.
 * It takes some extra work to ensure this, because to save on shared memory
 * the XID lock of a subtransaction is released when it ends, whether
 * successfully or unsuccessfully.  So we have to check if it's "still running"
 * and if so wait for its parent.
 */
void
XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
				  XLTW_Oper oper)
{
	LOCKTAG		tag;
	XactLockTableWaitInfo info;
	ErrorContextCallback callback;

	/*
	 * If an operation is specified, set up our verbose error context
	 * callback.
	 */
	if (oper != XLTW_None)
	{
		Assert(RelationIsValid(rel));
		Assert(ItemPointerIsValid(ctid));

		info.rel = rel;
		info.ctid = ctid;
		info.oper = oper;

		callback.callback = XactLockTableWaitErrorCb;
		callback.arg = &info;
		callback.previous = error_context_stack;
		error_context_stack = &callback;
	}

	for (;;)
	{
		Assert(TransactionIdIsValid(xid));
		Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));

		SET_LOCKTAG_TRANSACTION(tag, xid);

		(void) LockAcquire(&tag, ShareLock, false, false);

		LockRelease(&tag, ShareLock, false);

		if (!TransactionIdIsInProgress(xid))
			break;
		xid = SubTransGetParent(xid);
	}

	if (oper != XLTW_None)
		error_context_stack = callback.previous;
}
Example #13
0
/*
 * Check if specified heap tuple was inserted by given
 * xaction/command and return
 *
 * - -1 if not
 * - 0	if there is no tuple at all
 * - 1	if yes
 */
int
XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr,
				   TransactionId xid, CommandId cid)
{
	Relation	reln;
	Buffer		buffer;
	Page		page;
	ItemId		lp;
	HeapTupleHeader htup;

	reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
	if (!RelationIsValid(reln))
		return (0);

	buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
	if (!BufferIsValid(buffer))
		return (0);

	LockBuffer(buffer, BUFFER_LOCK_SHARE);
	page = (Page) BufferGetPage(buffer);
	if (PageIsNew((PageHeader) page) ||
		ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
	{
		UnlockAndReleaseBuffer(buffer);
		return (0);
	}
	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
	if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
	{
		UnlockAndReleaseBuffer(buffer);
		return (0);
	}

	htup = (HeapTupleHeader) PageGetItem(page, lp);

	Assert(PageGetSUI(page) == ThisStartUpID);
	if (!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), xid) ||
		HeapTupleHeaderGetCmin(htup) != cid)
	{
		UnlockAndReleaseBuffer(buffer);
		return (-1);
	}

	UnlockAndReleaseBuffer(buffer);
	return (1);
}
Example #14
0
/*
 *		XactLockTableWait
 *
 * Wait for the specified transaction to commit or abort.
 *
 * Note that this does the right thing for subtransactions: if we wait on a
 * subtransaction, we will exit as soon as it aborts or its top parent commits.
 * It takes some extra work to ensure this, because to save on shared memory
 * the XID lock of a subtransaction is released when it ends, whether
 * successfully or unsuccessfully.	So we have to check if it's "still running"
 * and if so wait for its parent.
 */
void
XactLockTableWait(TransactionId xid)
{
	LOCKTAG		tag;

	for (;;)
	{
		Assert(TransactionIdIsValid(xid));
		Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));

		SET_LOCKTAG_TRANSACTION(tag, xid);

		(void) LockAcquire(&tag, ShareLock, false, false);

		LockRelease(&tag, ShareLock, false);

		if (!TransactionIdIsInProgress(xid))
			break;
		xid = SubTransGetParent(xid);
	}
}
Example #15
0
/*
 *		ConditionalXactLockTableWait
 *
 * As above, but only lock if we can get the lock without blocking.
 * Returns TRUE if the lock was acquired.
 */
bool
ConditionalXactLockTableWait(TransactionId xid)
{
	LOCKTAG		tag;

	for (;;)
	{
		Assert(TransactionIdIsValid(xid));
		Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));

		SET_LOCKTAG_TRANSACTION(tag, xid);

		if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL)
			return false;

		LockRelease(&tag, ShareLock, false);

		if (!TransactionIdIsInProgress(xid))
			break;
		xid = SubTransGetParent(xid);
	}

	return true;
}
Example #16
0
/*
 * Make sure that DistributedLog has room for a newly-allocated XID.
 *
 * NB: this is called while holding XidGenLock.  We want it to be very fast
 * most of the time; even when it's not so fast, no actual I/O need happen
 * unless we're forced to write out a dirty DistributedLog or xlog page
 * to make room in shared memory.
 */
void
DistributedLog_Extend(TransactionId newestXact)
{
	MIRRORED_LOCK_DECLARE;

	int			page;

	/*
	 * No work except at first XID of a page.  But beware: just after
	 * wraparound, the first XID of page zero is FirstNormalTransactionId.
	 */
	if (TransactionIdToEntry(newestXact) != 0 &&
		!TransactionIdEquals(newestXact, FirstNormalTransactionId))
		return;

	page = TransactionIdToPage(newestXact);

	elog((Debug_print_full_dtm ? LOG : DEBUG5),
		 "DistributedLog_Extend page %d",
		 page);

	MIRRORED_LOCK;

	LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE);

	/* Zero the page and make an XLOG entry about it */
	DistributedLog_ZeroPage(page, true);

	LWLockRelease(DistributedLogControlLock);

	MIRRORED_UNLOCK;
	
	elog((Debug_print_full_dtm ? LOG : DEBUG5), 
		 "DistributedLog_Extend with newest local xid = %d to page = %d",
		 newestXact, page);
}
Example #17
0
/*
 * PrescanPreparedTransactions
 *
 * Scan the pg_twophase directory and determine the range of valid XIDs
 * present.  This is run during database startup, after we have completed
 * reading WAL.  ShmemVariableCache->nextXid has been set to one more than
 * the highest XID for which evidence exists in WAL.
 *
 * We throw away any prepared xacts with main XID beyond nextXid --- if any
 * are present, it suggests that the DBA has done a PITR recovery to an
 * earlier point in time without cleaning out pg_twophase.	We dare not
 * try to recover such prepared xacts since they likely depend on database
 * state that doesn't exist now.
 *
 * However, we will advance nextXid beyond any subxact XIDs belonging to
 * valid prepared xacts.  We need to do this since subxact commit doesn't
 * write a WAL entry, and so there might be no evidence in WAL of those
 * subxact XIDs.
 *
 * Our other responsibility is to determine and return the oldest valid XID
 * among the prepared xacts (if none, return ShmemVariableCache->nextXid).
 * This is needed to synchronize pg_subtrans startup properly.
 */
TransactionId
PrescanPreparedTransactions(void)
{
	TransactionId origNextXid = ShmemVariableCache->nextXid;
	TransactionId result = origNextXid;
	DIR		   *cldir;
	struct dirent *clde;

	cldir = AllocateDir(TWOPHASE_DIR);
	while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
	{
		if (strlen(clde->d_name) == 8 &&
			strspn(clde->d_name, "0123456789ABCDEF") == 8)
		{
			TransactionId xid;
			char	   *buf;
			TwoPhaseFileHeader *hdr;
			TransactionId *subxids;
			int			i;

			xid = (TransactionId) strtoul(clde->d_name, NULL, 16);

			/* Reject XID if too new */
			if (TransactionIdFollowsOrEquals(xid, origNextXid))
			{
				ereport(WARNING,
						(errmsg("removing future two-phase state file \"%s\"",
								clde->d_name)));
				RemoveTwoPhaseFile(xid, true);
				continue;
			}

			/*
			 * Note: we can't check if already processed because clog
			 * subsystem isn't up yet.
			 */

			/* Read and validate file */
			buf = ReadTwoPhaseFile(xid);
			if (buf == NULL)
			{
				ereport(WARNING,
					  (errmsg("removing corrupt two-phase state file \"%s\"",
							  clde->d_name)));
				RemoveTwoPhaseFile(xid, true);
				continue;
			}

			/* Deconstruct header */
			hdr = (TwoPhaseFileHeader *) buf;
			if (!TransactionIdEquals(hdr->xid, xid))
			{
				ereport(WARNING,
					  (errmsg("removing corrupt two-phase state file \"%s\"",
							  clde->d_name)));
				RemoveTwoPhaseFile(xid, true);
				pfree(buf);
				continue;
			}

			/*
			 * OK, we think this file is valid.  Incorporate xid into the
			 * running-minimum result.
			 */
			if (TransactionIdPrecedes(xid, result))
				result = xid;

			/*
			 * Examine subtransaction XIDs ... they should all follow main
			 * XID, and they may force us to advance nextXid.
			 */
			subxids = (TransactionId *)
				(buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
			for (i = 0; i < hdr->nsubxacts; i++)
			{
				TransactionId subxid = subxids[i];

				Assert(TransactionIdFollows(subxid, xid));
				if (TransactionIdFollowsOrEquals(subxid,
												 ShmemVariableCache->nextXid))
				{
					ShmemVariableCache->nextXid = subxid;
					TransactionIdAdvance(ShmemVariableCache->nextXid);
				}
			}

			pfree(buf);
		}
	}
	FreeDir(cldir);

	return result;
}
Example #18
0
/*
 * StrategyReplaceBuffer
 *
 *	Called by the buffer manager to inform us that he flushed a buffer
 *	and is now about to replace the content. Prior to this call,
 *	the cache algorithm still reports the buffer as in the cache. After
 *	this call we report the new block, even if IO might still need to
 *	be done to bring in the new content.
 *
 *	cdb_found_index and cdb_replace_index must be the auxiliary values
 *	returned by previous calls to StrategyBufferLookup and StrategyGetBuffer.
 */
void
StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
					  int cdb_found_index, int cdb_replace_index)
{
	BufferStrategyCDB *cdb_found;
	BufferStrategyCDB *cdb_replace;

	if (cdb_found_index >= 0)
	{
		/* This must have been a ghost buffer cache hit (B1 list) */
		cdb_found = &StrategyCDB[cdb_found_index];

		/* Assert that the buffer remembered in cdb_found is the one */
		/* the buffer manager is currently faulting in */
		Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag));

		if (cdb_replace_index >= 0)
		{
			/* We are satisfying it with an evicted T buffer */
			cdb_replace = &StrategyCDB[cdb_replace_index];

			/* Assert that the buffer remembered in cdb_replace is */
			/* the one the buffer manager has just evicted */
			Assert(cdb_replace->list == STRAT_LIST_T1 ||
				   cdb_replace->list == STRAT_LIST_T2);
			Assert(cdb_replace->buf_id == buf->buf_id);
			Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));

			/*
			 * Under normal circumstances we move evicted T1 list entries
			 * to the B1 list.  However, T1 entries that exist only because
			 * of VACUUM are just thrown into the unused list instead,
			 * since it's unlikely they'll be touched again soon.  Similarly,
			 * evicted T2 entries are thrown away; the LRU T2 entry cannot
			 * have been touched recently.
			 */
			if (cdb_replace->t1_vacuum || cdb_replace->list == STRAT_LIST_T2)
			{
				BufTableDelete(&(cdb_replace->buf_tag));
				STRAT_LIST_REMOVE(cdb_replace);
				cdb_replace->next = StrategyControl->listUnusedCDB;
				StrategyControl->listUnusedCDB = cdb_replace_index;
			}
			else
			{
				STRAT_LIST_REMOVE(cdb_replace);
				STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
			}
			/* And clear its block reference */
			cdb_replace->buf_id = -1;
		}
		else
		{
			/* We are satisfying it with an unused buffer */
		}

		/* Now the found B1 CDB gets the buffer and is moved to T2 */
		cdb_found->buf_id = buf->buf_id;
		STRAT_LIST_REMOVE(cdb_found);
		STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
	}
	else
	{
		/*
		 * This was a complete cache miss, so we need to create a new CDB.
		 * We use a free one if available, else reclaim the tail end of B1.
		 */
		if (StrategyControl->listUnusedCDB >= 0)
		{
			cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB];
			StrategyControl->listUnusedCDB = cdb_found->next;
		}
		else
		{
			/* Can't fail because we have more CDBs than buffers... */
			if (B1_LENGTH == 0)
				elog(PANIC, "StrategyReplaceBuffer: out of CDBs");
			cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];

			BufTableDelete(&(cdb_found->buf_tag));
			STRAT_LIST_REMOVE(cdb_found);
		}

		/* Set the CDB's buf_tag and insert it into the hash table */
		cdb_found->buf_tag = *newTag;
		BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));

		if (cdb_replace_index >= 0)
		{
			/*
			 * The buffer was formerly in a T list, move its CDB to the
			 * appropriate list: B1 if T1, else discard it, as above
			 */
			cdb_replace = &StrategyCDB[cdb_replace_index];

			Assert(cdb_replace->list == STRAT_LIST_T1 ||
				   cdb_replace->list == STRAT_LIST_T2);
			Assert(cdb_replace->buf_id == buf->buf_id);
			Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));

			if (cdb_replace->list == STRAT_LIST_T1)
			{
				STRAT_LIST_REMOVE(cdb_replace);
				STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
			}
			else
			{
				BufTableDelete(&(cdb_replace->buf_tag));
				STRAT_LIST_REMOVE(cdb_replace);
				cdb_replace->next = StrategyControl->listUnusedCDB;
				StrategyControl->listUnusedCDB = cdb_replace_index;
			}
			/* And clear its block reference */
			cdb_replace->buf_id = -1;
		}
		else
		{
			/* We are satisfying it with an unused buffer */
		}

		/* Assign the buffer id to the new CDB */
		cdb_found->buf_id = buf->buf_id;

		/*
		 * Specialized VACUUM optimization. If this complete cache miss
		 * happened because vacuum needed the page, we place it at the LRU
		 * position of T1; normally it goes at the MRU position.
		 */
		if (strategy_hint_vacuum)
		{
			if (TransactionIdEquals(strategy_vacuum_xid,
									GetTopTransactionId()))
				STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
			else
			{
				/* VACUUM must have been aborted by error, reset flag */
				strategy_hint_vacuum = false;
				STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
			}
		}
		else
			STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);

		/*
		 * Remember the Xid when this buffer went onto T1 to avoid a
		 * single UPDATE promoting a newcomer straight into T2. Also
		 * remember if it was loaded for VACUUM.
		 */
		cdb_found->t1_xid = GetTopTransactionId();
		cdb_found->t1_vacuum = strategy_hint_vacuum;
	}
}
Example #19
0
/*
 * For all items in this page, find their respective root line pointers.
 * If item k is part of a HOT-chain with root at item j, then we set
 * root_offsets[k - 1] = j.
 *
 * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
 * We zero out all unused entries.
 *
 * The function must be called with at least share lock on the buffer, to
 * prevent concurrent prune operations.
 *
 * Note: The information collected here is valid only as long as the caller
 * holds a pin on the buffer. Once pin is released, a tuple might be pruned
 * and reused by a completely unrelated tuple.
 */
void
heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
{
	OffsetNumber offnum,
				maxoff;

	MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));

	maxoff = PageGetMaxOffsetNumber(page);
	for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
	{
		ItemId		lp = PageGetItemId(page, offnum);
		HeapTupleHeader htup;
		OffsetNumber nextoffnum;
		TransactionId priorXmax;

		/* skip unused and dead items */
		if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
			continue;

		if (ItemIdIsNormal(lp))
		{
			htup = (HeapTupleHeader) PageGetItem(page, lp);

			/*
			 * Check if this tuple is part of a HOT-chain rooted at some other
			 * tuple. If so, skip it for now; we'll process it when we find
			 * its root.
			 */
			if (HeapTupleHeaderIsHeapOnly(htup))
				continue;

			/*
			 * This is either a plain tuple or the root of a HOT-chain.
			 * Remember it in the mapping.
			 */
			root_offsets[offnum - 1] = offnum;

			/* If it's not the start of a HOT-chain, we're done with it */
			if (!HeapTupleHeaderIsHotUpdated(htup))
				continue;

			/* Set up to scan the HOT-chain */
			nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
			priorXmax = HeapTupleHeaderGetXmax(htup);
		}
		else
		{
			/* Must be a redirect item. We do not set its root_offsets entry */
			Assert(ItemIdIsRedirected(lp));
			/* Set up to scan the HOT-chain */
			nextoffnum = ItemIdGetRedirect(lp);
			priorXmax = InvalidTransactionId;
		}

		/*
		 * Now follow the HOT-chain and collect other tuples in the chain.
		 *
		 * Note: Even though this is a nested loop, the complexity of the
		 * function is O(N) because a tuple in the page should be visited not
		 * more than twice, once in the outer loop and once in HOT-chain
		 * chases.
		 */
		for (;;)
		{
			lp = PageGetItemId(page, nextoffnum);

			/* Check for broken chains */
			if (!ItemIdIsNormal(lp))
				break;

			htup = (HeapTupleHeader) PageGetItem(page, lp);

			if (TransactionIdIsValid(priorXmax) &&
				!TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
				break;

			/* Remember the root line pointer for this item */
			root_offsets[nextoffnum - 1] = offnum;

			/* Advance to next chain member, if any */
			if (!HeapTupleHeaderIsHotUpdated(htup))
				break;

			nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
			priorXmax = HeapTupleHeaderGetXmax(htup);
		}
	}
}
Example #20
0
/*
 * XidInMVCCSnapshot
 *		Is the given XID still-in-progress according to the snapshot?
 *
 * Note: GetSnapshotData never stores either top xid or subxids of our own
 * backend into a snapshot, so these xids will not be reported as "running"
 * by this function.  This is OK for current uses, because we actually only
 * apply this for known-committed XIDs.
 */
static bool
XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
{
	uint32		i;

	/*
	 * Make a quick range check to eliminate most XIDs without looking at the
	 * xip arrays.  Note that this is OK even if we convert a subxact XID to
	 * its parent below, because a subxact with XID < xmin has surely also got
	 * a parent with XID < xmin, while one with XID >= xmax must belong to a
	 * parent that was not yet committed at the time of this snapshot.
	 */

	/* Any xid < xmin is not in-progress */
	if (TransactionIdPrecedes(xid, snapshot->xmin))
		return false;
	/* Any xid >= xmax is in-progress */
	if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
		return true;

	/*
	 * Snapshot information is stored slightly differently in snapshots taken
	 * during recovery.
	 */
	if (!snapshot->takenDuringRecovery)
	{
		/*
		 * If the snapshot contains full subxact data, the fastest way to
		 * check things is just to compare the given XID against both subxact
		 * XIDs and top-level XIDs.  If the snapshot overflowed, we have to
		 * use pg_subtrans to convert a subxact XID to its parent XID, but
		 * then we need only look at top-level XIDs not subxacts.
		 */
		if (!snapshot->suboverflowed)
		{
			/* full data, so search subxip */
			int32		j;

			for (j = 0; j < snapshot->subxcnt; j++)
			{
				if (TransactionIdEquals(xid, snapshot->subxip[j]))
					return true;
			}

			/* not there, fall through to search xip[] */
		}
		else
		{
			/* overflowed, so convert xid to top-level */
			xid = SubTransGetTopmostTransaction(xid);

			/*
			 * If xid was indeed a subxact, we might now have an xid < xmin,
			 * so recheck to avoid an array scan.  No point in rechecking
			 * xmax.
			 */
			if (TransactionIdPrecedes(xid, snapshot->xmin))
				return false;
		}

		for (i = 0; i < snapshot->xcnt; i++)
		{
			if (TransactionIdEquals(xid, snapshot->xip[i]))
				return true;
		}
	}
	else
	{
		int32		j;

		/*
		 * In recovery we store all xids in the subxact array because it is by
		 * far the bigger array, and we mostly don't know which xids are
		 * top-level and which are subxacts. The xip array is empty.
		 *
		 * We start by searching subtrans, if we overflowed.
		 */
		if (snapshot->suboverflowed)
		{
			/* overflowed, so convert xid to top-level */
			xid = SubTransGetTopmostTransaction(xid);

			/*
			 * If xid was indeed a subxact, we might now have an xid < xmin,
			 * so recheck to avoid an array scan.  No point in rechecking
			 * xmax.
			 */
			if (TransactionIdPrecedes(xid, snapshot->xmin))
				return false;
		}

		/*
		 * We now have either a top-level xid higher than xmin or an
		 * indeterminate xid. We don't know whether it's top level or subxact
		 * but it doesn't matter. If it's present, the xid is visible.
		 */
		for (j = 0; j < snapshot->subxcnt; j++)
		{
			if (TransactionIdEquals(xid, snapshot->subxip[j]))
				return true;
		}
	}

	return false;
}
Example #21
0
Datum
_Slony_I_createEvent(PG_FUNCTION_ARGS)
{
	TransactionId newXid = GetTopTransactionId();
	Slony_I_ClusterStatus *cs;
	char	   *ev_type_c;
	Datum		argv[9];
	char		nulls[10];
	char	   *buf;
	size_t		buf_size;
	int			rc;
	int			i;
	int64		retval;
	bool		isnull;

#ifdef HAVE_GETACTIVESNAPSHOT
	if (GetActiveSnapshot() == NULL)
		elog(ERROR, "Slony-I: ActiveSnapshot is NULL in createEvent()");
#else
	if (SerializableSnapshot == NULL)
		elog(ERROR, "Slony-I: SerializableSnapshot is NULL in createEvent()");
#endif

	if ((rc = SPI_connect()) < 0)
		elog(ERROR, "Slony-I: SPI_connect() failed in createEvent()");

	/*
	 * Get or create the cluster status information and make sure it has the
	 * SPI plans that we need here.
	 */
	cs = getClusterStatus(PG_GETARG_NAME(0),
						  PLAN_INSERT_EVENT);

	buf_size = 8192;
	buf = palloc(buf_size);

	/*
	 * Do the following only once per transaction.
	 */
	if (!TransactionIdEquals(cs->currentXid, newXid))
	{
		cs->currentXid = newXid;
	}

	/*
	 * Call the saved INSERT plan
	 */
	for (i = 1; i < 10; i++)
	{
		if (i >= PG_NARGS() || PG_ARGISNULL(i))
		{
			argv[i - 1] = (Datum) 0;
			nulls[i - 1] = 'n';
		}
		else
		{
			argv[i - 1] = PG_GETARG_DATUM(i);
			nulls[i - 1] = ' ';
		}
	}
	nulls[9] = '\0';

	if ((rc = SPI_execp(cs->plan_insert_event, argv, nulls, 0)) < 0)
		elog(ERROR, "Slony-I: SPI_execp() failed for \"INSERT INTO sl_event ...\"");

	/*
	 * The INSERT plan also contains a SELECT currval('sl_event_seq'), use the
	 * new sequence number as return value.
	 */
	if (SPI_processed != 1)
		elog(ERROR, "Slony-I: INSERT plan did not return 1 result row");
	retval = DatumGetInt64(SPI_getbinval(SPI_tuptable->vals[0],
										 SPI_tuptable->tupdesc, 1, &isnull));

	/*
	 * For SYNC and ENABLE_SUBSCRIPTION events, we also remember all current
	 * sequence values.
	 */
	if (PG_NARGS() > 1 && !PG_ARGISNULL(1))
	{
		ev_type_c = DatumGetPointer(DirectFunctionCall1(
											   textout, PG_GETARG_DATUM(1)));
		if (strcmp(ev_type_c, "SYNC") == 0 ||
			strcmp(ev_type_c, "ENABLE_SUBSCRIPTION") == 0)
		{
/*@-nullpass@*/
			if ((rc = SPI_execp(cs->plan_record_sequences, NULL, NULL, 0)) < 0)
				elog(ERROR, "Slony-I: SPI_execp() failed for \"INSERT INTO sl_seqlog ...\"");
/*@+nullpass@*/
		}
	}

	(void) SPI_finish();
/*@-mustfreefresh@*/
	PG_RETURN_INT64(retval);
}
Example #22
0
Datum
_Slony_I_logTrigger(PG_FUNCTION_ARGS)
{
	TransactionId newXid = GetTopTransactionId();
	Slony_I_ClusterStatus *cs;
	TriggerData *tg;
	Datum		argv[4];
	text	   *cmdtype = NULL;
	int			rc;
	Name		cluster_name;
	int32		tab_id;
	char	   *attkind;
	int			attkind_idx;
	int			cmddata_need;

	/*
	 * Don't do any logging if the current session role isn't Origin.
	 */
	if (SessionReplicationRole != SESSION_REPLICATION_ROLE_ORIGIN)
		return PointerGetDatum(NULL);

	/*
	 * Get the trigger call context
	 */
	if (!CALLED_AS_TRIGGER(fcinfo))
		elog(ERROR, "Slony-I: logTrigger() not called as trigger");
	tg = (TriggerData *) (fcinfo->context);

	/*
	 * Check all logTrigger() calling conventions
	 */
	if (!TRIGGER_FIRED_AFTER(tg->tg_event))
		elog(ERROR, "Slony-I: logTrigger() must be fired AFTER");
	if (!TRIGGER_FIRED_FOR_ROW(tg->tg_event))
		elog(ERROR, "Slony-I: logTrigger() must be fired FOR EACH ROW");
	if (tg->tg_trigger->tgnargs != 3)
		elog(ERROR, "Slony-I: logTrigger() must be defined with 3 args");

	/*
	 * Connect to the SPI manager
	 */
	if ((rc = SPI_connect()) < 0)
		elog(ERROR, "Slony-I: SPI_connect() failed in createEvent()");

	/*
	 * Get all the trigger arguments
	 */
	cluster_name = DatumGetName(DirectFunctionCall1(namein,
								CStringGetDatum(tg->tg_trigger->tgargs[0])));
	tab_id = strtol(tg->tg_trigger->tgargs[1], NULL, 10);
	attkind = tg->tg_trigger->tgargs[2];

	/*
	 * Get or create the cluster status information and make sure it has the
	 * SPI plans that we need here.
	 */
	cs = getClusterStatus(cluster_name, PLAN_INSERT_LOG);

	/*
	 * Do the following only once per transaction.
	 */
	if (!TransactionIdEquals(cs->currentXid, newXid))
	{
		int32		log_status;
		bool isnull;

		/*
		 * Determine the currently active log table
		 */
		if (SPI_execp(cs->plan_get_logstatus, NULL, NULL, 0) < 0)
			elog(ERROR, "Slony-I: cannot determine log status");
		if (SPI_processed != 1)
			elog(ERROR, "Slony-I: cannot determine log status");

		log_status = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
											SPI_tuptable->tupdesc, 1, &isnull));
		SPI_freetuptable(SPI_tuptable);

		switch (log_status)
		{
			case 0:
			case 2:
				cs->plan_active_log = cs->plan_insert_log_1;
				break;

			case 1:
			case 3:
				cs->plan_active_log = cs->plan_insert_log_2;
				break;

			default:
				elog(ERROR, "Slony-I: illegal log status %d", log_status);
				break;
		}

		cs->currentXid = newXid;
	}

	/*
	 * Determine cmdtype and cmddata depending on the command type
	 */
	if (TRIGGER_FIRED_BY_INSERT(tg->tg_event))
	{
		HeapTuple	new_row = tg->tg_trigtuple;
		TupleDesc	tupdesc = tg->tg_relation->rd_att;
		char	   *col_ident;
		char	   *col_value;

		int			len_ident;
		int			len_value;
		int			i;
		int			need_comma = false;
		char	   *OldDateStyle;
		char	   *cp = VARDATA(cs->cmddata_buf);

		/*
		 * INSERT
		 *
		 * cmdtype = 'I' cmddata = ("col" [, ...]) values ('value' [, ...])
		 */
		cmdtype = cs->cmdtype_I;

		/*
		 * Specify all the columns
		 */
		*cp++ = '(';
		for (i = 0; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Skip dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_comma)
				*cp++ = ',';
			else
				need_comma = true;

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
		}

		/*
		 * Append the string ") values ("
		 */
		*cp++ = ')';
		*cp++ = ' ';
		*cp++ = 'v';
		*cp++ = 'a';
		*cp++ = 'l';
		*cp++ = 'u';
		*cp++ = 'e';
		*cp++ = 's';
		*cp++ = ' ';
		*cp++ = '(';

		/*
		 * Append the values
		 */
		need_comma = false;
		OldDateStyle = GetConfigOptionByName("DateStyle", NULL);
		if (!strstr(OldDateStyle, "ISO"))
			set_config_option("DateStyle", "ISO", PGC_USERSET, PGC_S_SESSION, true, true);
		for (i = 0; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Skip dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;


			if ((col_value = SPI_getvalue(new_row, tupdesc, i + 1)) == NULL)
			{
				col_value = "NULL";
			}
			else
			{
				col_value = slon_quote_literal(col_value);
			}

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_comma)
				*cp++ = ',';
			else
				need_comma = true;

			memcpy(cp, col_value, len_value);
			cp += len_value;
		}

		if (!strstr(OldDateStyle, "ISO"))
			set_config_option("DateStyle", OldDateStyle, PGC_USERSET, PGC_S_SESSION, true, true);

		/*
		 * Terminate and done
		 */
		*cp++ = ')';
		*cp = '\0';
		SET_VARSIZE(cs->cmddata_buf,
					VARHDRSZ + (cp - VARDATA(cs->cmddata_buf)));
	}
	else if (TRIGGER_FIRED_BY_UPDATE(tg->tg_event))
	{
		HeapTuple	old_row = tg->tg_trigtuple;
		HeapTuple	new_row = tg->tg_newtuple;
		TupleDesc	tupdesc = tg->tg_relation->rd_att;
		Datum		old_value;
		Datum		new_value;
		bool		old_isnull;
		bool		new_isnull;

		char	   *col_ident;
		char	   *col_value;
		int			len_ident;
		int			len_value;
		int			i;
		int			need_comma = false;
		int			need_and = false;
		char	   *OldDateStyle;

		char	   *cp = VARDATA(cs->cmddata_buf);

		/*
		 * UPDATE
		 *
		 * cmdtype = 'U' cmddata = "col_ident"='value' [, ...] where
		 * "pk_ident" = 'value' [ and ...]
		 */
		cmdtype = cs->cmdtype_U;
		for (i = 0; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Ignore dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			old_value = SPI_getbinval(old_row, tupdesc, i + 1, &old_isnull);
			new_value = SPI_getbinval(new_row, tupdesc, i + 1, &new_isnull);

			/*
			 * If old and new value are NULL, the column is unchanged
			 */
			if (old_isnull && new_isnull)
				continue;

			/*
			 * If both are NOT NULL, we need to compare the values and skip
			 * setting the column if equal
			 */
			if (!old_isnull && !new_isnull)
			{
				Oid			opr_oid;
				FmgrInfo   *opr_finfo_p;

				/*
				 * Lookup the equal operators function call info using the
				 * typecache if available
				 */
#ifdef HAVE_TYPCACHE
				TypeCacheEntry *type_cache;

				type_cache = lookup_type_cache(
											   SPI_gettypeid(tupdesc, i + 1),
								  TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO);
				opr_oid = type_cache->eq_opr;
				if (opr_oid == ARRAY_EQ_OP)
					opr_oid = InvalidOid;
				else
					opr_finfo_p = &(type_cache->eq_opr_finfo);
#else
				FmgrInfo	opr_finfo;

				opr_oid = compatible_oper_funcid(makeList1(makeString("=")),
											   SPI_gettypeid(tupdesc, i + 1),
										SPI_gettypeid(tupdesc, i + 1), true);
				if (OidIsValid(opr_oid))
				{
					fmgr_info(opr_oid, &opr_finfo);
					opr_finfo_p = &opr_finfo;
				}
#endif

				/*
				 * If we have an equal operator, use that to do binary
				 * comparision. Else get the string representation of both
				 * attributes and do string comparision.
				 */
				if (OidIsValid(opr_oid))
				{
					if (DatumGetBool(FunctionCall2(opr_finfo_p,
												   old_value, new_value)))
						continue;
				}
				else
				{
					char	   *old_strval = SPI_getvalue(old_row, tupdesc, i + 1);
					char	   *new_strval = SPI_getvalue(new_row, tupdesc, i + 1);

					if (strcmp(old_strval, new_strval) == 0)
						continue;
				}
			}

			if (need_comma)
				*cp++ = ',';
			else
				need_comma = true;

			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			if (new_isnull)
				col_value = "NULL";
			else
			{
				OldDateStyle = GetConfigOptionByName("DateStyle", NULL);
				if (!strstr(OldDateStyle, "ISO"))
					set_config_option("DateStyle", "ISO", PGC_USERSET, PGC_S_SESSION, true, true);
				col_value = slon_quote_literal(SPI_getvalue(new_row, tupdesc, i + 1));
				if (!strstr(OldDateStyle, "ISO"))
					set_config_option("DateStyle", OldDateStyle, PGC_USERSET, PGC_S_SESSION, true, true);
			}
			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}

		/*
		 * It can happen that the only UPDATE an application does is to set a
		 * column to the same value again. In that case, we'd end up here with
		 * no columns in the SET clause yet. We add the first key column here
		 * with it's old value to simulate the same for the replication
		 * engine.
		 */
		if (!need_comma)
		{
			for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++)
			{
				if (tupdesc->attrs[i]->attisdropped)
					continue;

				attkind_idx++;
				if (!attkind[attkind_idx])
					elog(ERROR, "Slony-I: no key columns found in logTrigger() attkind parameter");

				if (attkind[attkind_idx] == 'k')
					break;
			}
			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1));

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}

		*cp++ = ' ';
		*cp++ = 'w';
		*cp++ = 'h';
		*cp++ = 'e';
		*cp++ = 'r';
		*cp++ = 'e';
		*cp++ = ' ';

		for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Ignore dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			attkind_idx++;
			if (!attkind[attkind_idx])
				break;
			if (attkind[attkind_idx] != 'k')
				continue;
			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1));
			if (col_value == NULL)
				elog(ERROR, "Slony-I: old key column %s.%s IS NULL on UPDATE",
					 NameStr(tg->tg_relation->rd_rel->relname), col_ident);

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_and)
			{
				*cp++ = ' ';
				*cp++ = 'a';
				*cp++ = 'n';
				*cp++ = 'd';
				*cp++ = ' ';
			}
			else
				need_and = true;

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}
		*cp = '\0';
		SET_VARSIZE(cs->cmddata_buf,
					VARHDRSZ + (cp - VARDATA(cs->cmddata_buf)));
	}
	else if (TRIGGER_FIRED_BY_DELETE(tg->tg_event))
	{
		HeapTuple	old_row = tg->tg_trigtuple;
		TupleDesc	tupdesc = tg->tg_relation->rd_att;
		char	   *col_ident;
		char	   *col_value;
		int			len_ident;
		int			len_value;
		int			i;
		int			need_and = false;
		char	   *cp = VARDATA(cs->cmddata_buf);

		/*
		 * DELETE
		 *
		 * cmdtype = 'D' cmddata = "pk_ident"='value' [and ...]
		 */
		cmdtype = cs->cmdtype_D;

		for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++)
		{
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			attkind_idx++;
			if (!attkind[attkind_idx])
				break;
			if (attkind[attkind_idx] != 'k')
				continue;
			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1));
			if (col_value == NULL)
				elog(ERROR, "Slony-I: old key column %s.%s IS NULL on DELETE",
					 NameStr(tg->tg_relation->rd_rel->relname), col_ident);

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_and)
			{
				*cp++ = ' ';
				*cp++ = 'a';
				*cp++ = 'n';
				*cp++ = 'd';
				*cp++ = ' ';
			}
			else
				need_and = true;

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}
		*cp = '\0';
		SET_VARSIZE(cs->cmddata_buf,
					VARHDRSZ + (cp - VARDATA(cs->cmddata_buf)));
	}
	else
		elog(ERROR, "Slony-I: logTrigger() fired for unhandled event");

	/*
	 * Construct the parameter array and insert the log row.
	 */
	argv[0] = Int32GetDatum(tab_id);
	argv[1] = PointerGetDatum(cmdtype);
	argv[2] = PointerGetDatum(cs->cmddata_buf);
	SPI_execp(cs->plan_active_log, argv, NULL, 0);

	SPI_finish();
	return PointerGetDatum(NULL);
}
Example #23
0
/* ----------------
 *		index_getnext - get the next heap tuple from a scan
 *
 * The result is the next heap tuple satisfying the scan keys and the
 * snapshot, or NULL if no more matching tuples exist.	On success,
 * the buffer containing the heap tuple is pinned (the pin will be dropped
 * at the next index_getnext or index_endscan).
 *
 * Note: caller must check scan->xs_recheck, and perform rechecking of the
 * scan keys if required.  We do not do that here because we don't have
 * enough information to do it efficiently in the general case.
 * ----------------
 */
HeapTuple
index_getnext(IndexScanDesc scan, ScanDirection direction)
{
	HeapTuple	heapTuple = &scan->xs_ctup;
	ItemPointer tid = &heapTuple->t_self;
	FmgrInfo   *procedure;

	SCAN_CHECKS;
	GET_SCAN_PROCEDURE(amgettuple);

	Assert(TransactionIdIsValid(RecentGlobalXmin));

	/*
	 * We always reset xs_hot_dead; if we are here then either we are just
	 * starting the scan, or we previously returned a visible tuple, and in
	 * either case it's inappropriate to kill the prior index entry.
	 */
	scan->xs_hot_dead = false;

	for (;;)
	{
		OffsetNumber offnum;
		bool		at_chain_start;
		Page		dp;

		if (scan->xs_next_hot != InvalidOffsetNumber)
		{
			/*
			 * We are resuming scan of a HOT chain after having returned an
			 * earlier member.	Must still hold pin on current heap page.
			 */
			Assert(BufferIsValid(scan->xs_cbuf));
			Assert(ItemPointerGetBlockNumber(tid) ==
				   BufferGetBlockNumber(scan->xs_cbuf));
			Assert(TransactionIdIsValid(scan->xs_prev_xmax));
			offnum = scan->xs_next_hot;
			at_chain_start = false;
			scan->xs_next_hot = InvalidOffsetNumber;
		}
		else
		{
			bool		found;
			Buffer		prev_buf;

			/*
			 * If we scanned a whole HOT chain and found only dead tuples,
			 * tell index AM to kill its entry for that TID. We do not do this
			 * when in recovery because it may violate MVCC to do so. see
			 * comments in RelationGetIndexScan().
			 */
			if (!scan->xactStartedInRecovery)
				scan->kill_prior_tuple = scan->xs_hot_dead;

			/*
			 * The AM's gettuple proc finds the next index entry matching the
			 * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It
			 * should also set scan->xs_recheck, though we pay no attention to
			 * that here.
			 */
			found = DatumGetBool(FunctionCall2(procedure,
											   PointerGetDatum(scan),
											   Int32GetDatum(direction)));

			/* Reset kill flag immediately for safety */
			scan->kill_prior_tuple = false;

			/* If we're out of index entries, break out of outer loop */
			if (!found)
				break;

			pgstat_count_index_tuples(scan->indexRelation, 1);

			/* Switch to correct buffer if we don't have it already */
			prev_buf = scan->xs_cbuf;
			scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
												 scan->heapRelation,
											 ItemPointerGetBlockNumber(tid));

			/*
			 * Prune page, but only if we weren't already on this page
			 */
			if (prev_buf != scan->xs_cbuf)
				heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
									RecentGlobalXmin);

			/* Prepare to scan HOT chain starting at index-referenced offnum */
			offnum = ItemPointerGetOffsetNumber(tid);
			at_chain_start = true;

			/* We don't know what the first tuple's xmin should be */
			scan->xs_prev_xmax = InvalidTransactionId;

			/* Initialize flag to detect if all entries are dead */
			scan->xs_hot_dead = true;
		}

		/* Obtain share-lock on the buffer so we can examine visibility */
		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);

		dp = (Page) BufferGetPage(scan->xs_cbuf);

		/* Scan through possible multiple members of HOT-chain */
		for (;;)
		{
			ItemId		lp;
			ItemPointer ctid;
			bool		valid;

			/* check for bogus TID */
			if (offnum < FirstOffsetNumber ||
				offnum > PageGetMaxOffsetNumber(dp))
				break;

			lp = PageGetItemId(dp, offnum);

			/* check for unused, dead, or redirected items */
			if (!ItemIdIsNormal(lp))
			{
				/* We should only see a redirect at start of chain */
				if (ItemIdIsRedirected(lp) && at_chain_start)
				{
					/* Follow the redirect */
					offnum = ItemIdGetRedirect(lp);
					at_chain_start = false;
					continue;
				}
				/* else must be end of chain */
				break;
			}

			/*
			 * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
			 * it is returned to the executor on success.
			 */
			heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
			heapTuple->t_len = ItemIdGetLength(lp);
			ItemPointerSetOffsetNumber(tid, offnum);
			heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
			ctid = &heapTuple->t_data->t_ctid;

			/*
			 * Shouldn't see a HEAP_ONLY tuple at chain start.  (This test
			 * should be unnecessary, since the chain root can't be removed
			 * while we have pin on the index entry, but let's make it
			 * anyway.)
			 */
			if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
				break;

			/*
			 * The xmin should match the previous xmax value, else chain is
			 * broken.	(Note: this test is not optional because it protects
			 * us against the case where the prior chain member's xmax aborted
			 * since we looked at it.)
			 */
			if (TransactionIdIsValid(scan->xs_prev_xmax) &&
				!TransactionIdEquals(scan->xs_prev_xmax,
								  HeapTupleHeaderGetXmin(heapTuple->t_data)))
				break;

			/* If it's visible per the snapshot, we must return it */
			valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
												 scan->xs_cbuf);

			CheckForSerializableConflictOut(valid, scan->heapRelation,
											heapTuple, scan->xs_cbuf);

			if (valid)
			{
				/*
				 * If the snapshot is MVCC, we know that it could accept at
				 * most one member of the HOT chain, so we can skip examining
				 * any more members.  Otherwise, check for continuation of the
				 * HOT-chain, and set state for next time.
				 */
				if (IsMVCCSnapshot(scan->xs_snapshot)
					&& !IsolationIsSerializable())
					scan->xs_next_hot = InvalidOffsetNumber;
				else if (HeapTupleIsHotUpdated(heapTuple))
				{
					Assert(ItemPointerGetBlockNumber(ctid) ==
						   ItemPointerGetBlockNumber(tid));
					scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
					scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
				}
				else
					scan->xs_next_hot = InvalidOffsetNumber;

				PredicateLockTuple(scan->heapRelation, heapTuple);

				LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);

				pgstat_count_heap_fetch(scan->indexRelation);

				return heapTuple;
			}

			/*
			 * If we can't see it, maybe no one else can either.  Check to see
			 * if the tuple is dead to all transactions.  If we find that all
			 * the tuples in the HOT chain are dead, we'll signal the index AM
			 * to not return that TID on future indexscans.
			 */
			if (scan->xs_hot_dead &&
				HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
										 scan->xs_cbuf) != HEAPTUPLE_DEAD)
				scan->xs_hot_dead = false;

			/*
			 * Check to see if HOT chain continues past this tuple; if so
			 * fetch the next offnum (we don't bother storing it into
			 * xs_next_hot, but must store xs_prev_xmax), and loop around.
			 */
			if (HeapTupleIsHotUpdated(heapTuple))
			{
				Assert(ItemPointerGetBlockNumber(ctid) ==
					   ItemPointerGetBlockNumber(tid));
				offnum = ItemPointerGetOffsetNumber(ctid);
				at_chain_start = false;
				scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
			}
			else
				break;			/* end of chain */
		}						/* loop over a single HOT chain */

		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);

		/* Loop around to ask index AM for another TID */
		scan->xs_next_hot = InvalidOffsetNumber;
	}

	/* Release any held pin on a heap page */
	if (BufferIsValid(scan->xs_cbuf))
	{
		ReleaseBuffer(scan->xs_cbuf);
		scan->xs_cbuf = InvalidBuffer;
	}

	return NULL;				/* failure exit */
}
Example #24
0
/*
 * Prune specified item pointer or a HOT chain originating at that item.
 *
 * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
 * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
 * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
 * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
 * DEAD, the OldestXmin test is just too coarse to detect it.
 *
 * The root line pointer is redirected to the tuple immediately after the
 * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
 * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
 * tuple, which we treat as a chain of length 1.)
 *
 * OldestXmin is the cutoff XID used to identify dead tuples.
 *
 * We don't actually change the page here, except perhaps for hint-bit updates
 * caused by HeapTupleSatisfiesVacuum.	We just add entries to the arrays in
 * prstate showing the changes to be made.	Items to be redirected are added
 * to the redirected[] array (two entries per redirection); items to be set to
 * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
 * state are added to nowunused[].
 *
 * If redirect_move is true, we intend to get rid of redirecting line pointers,
 * not just make redirection entries.
 *
 * Returns the number of tuples (to be) deleted from the page.
 */
static int
heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
				 TransactionId OldestXmin,
				 PruneState *prstate,
				 bool redirect_move)
{
	int			ndeleted = 0;
	Page		dp = (Page) BufferGetPage(buffer);
	TransactionId priorXmax = InvalidTransactionId;
	ItemId		rootlp;
	HeapTupleHeader htup;
	OffsetNumber latestdead = InvalidOffsetNumber,
				redirect_target = InvalidOffsetNumber,
				maxoff = PageGetMaxOffsetNumber(dp),
				offnum;
	OffsetNumber chainitems[MaxHeapTuplesPerPage];
	int			nchain = 0,
				i;

	rootlp = PageGetItemId(dp, rootoffnum);

	/*
	 * If it's a heap-only tuple, then it is not the start of a HOT chain.
	 */
	if (ItemIdIsNormal(rootlp))
	{
		htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
		if (HeapTupleHeaderIsHeapOnly(htup))
		{
			/*
			 * If the tuple is DEAD and doesn't chain to anything else, mark
			 * it unused immediately.  (If it does chain, we can only remove
			 * it as part of pruning its chain.)
			 *
			 * We need this primarily to handle aborted HOT updates, that is,
			 * XMIN_INVALID heap-only tuples.  Those might not be linked to by
			 * any chain, since the parent tuple might be re-updated before
			 * any pruning occurs.	So we have to be able to reap them
			 * separately from chain-pruning.  (Note that
			 * HeapTupleHeaderIsHotUpdated will never return true for an
			 * XMIN_INVALID tuple, so this code will work even when there were
			 * sequential updates within the aborted transaction.)
			 *
			 * Note that we might first arrive at a dead heap-only tuple
			 * either here or while following a chain below.  Whichever path
			 * gets there first will mark the tuple unused.
			 */
			if (HeapTupleSatisfiesVacuum(relation, htup, OldestXmin, buffer)
				== HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
			{
				heap_prune_record_unused(prstate, rootoffnum);
				ndeleted++;
			}

			/* Nothing more to do */
			return ndeleted;
		}
	}

	/* Start from the root tuple */
	offnum = rootoffnum;

	/* while not end of the chain */
	for (;;)
	{
		ItemId		lp;
		bool		tupdead,
					recent_dead;

		/* Some sanity checks */
		if (offnum < FirstOffsetNumber || offnum > maxoff)
			break;

		/* If item is already processed, stop --- it must not be same chain */
		if (prstate->marked[offnum])
			break;

		lp = PageGetItemId(dp, offnum);

		/* Unused item obviously isn't part of the chain */
		if (!ItemIdIsUsed(lp))
			break;

		/*
		 * If we are looking at the redirected root line pointer, jump to the
		 * first normal tuple in the chain.  If we find a redirect somewhere
		 * else, stop --- it must not be same chain.
		 */
		if (ItemIdIsRedirected(lp))
		{
			if (nchain > 0)
				break;			/* not at start of chain */
			chainitems[nchain++] = offnum;
			offnum = ItemIdGetRedirect(rootlp);
			continue;
		}

		/*
		 * Likewise, a dead item pointer can't be part of the chain. (We
		 * already eliminated the case of dead root tuple outside this
		 * function.)
		 */
		if (ItemIdIsDead(lp))
			break;

		Assert(ItemIdIsNormal(lp));
		htup = (HeapTupleHeader) PageGetItem(dp, lp);

		/*
		 * Check the tuple XMIN against prior XMAX, if any
		 */
		if (TransactionIdIsValid(priorXmax) &&
			!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
			break;

		/*
		 * OK, this tuple is indeed a member of the chain.
		 */
		chainitems[nchain++] = offnum;

		/*
		 * Check tuple's visibility status.
		 */
		tupdead = recent_dead = false;

		switch (HeapTupleSatisfiesVacuum(relation, htup, OldestXmin, buffer))
		{
			case HEAPTUPLE_DEAD:
				tupdead = true;
				break;

			case HEAPTUPLE_RECENTLY_DEAD:
				recent_dead = true;

				/*
				 * This tuple may soon become DEAD.  Update the hint field so
				 * that the page is reconsidered for pruning in future.
				 */
				heap_prune_record_prunable(prstate,
										   HeapTupleHeaderGetXmax(htup));
				break;

			case HEAPTUPLE_DELETE_IN_PROGRESS:

				/*
				 * This tuple may soon become DEAD.  Update the hint field so
				 * that the page is reconsidered for pruning in future.
				 */
				heap_prune_record_prunable(prstate,
										   HeapTupleHeaderGetXmax(htup));
				break;

			case HEAPTUPLE_LIVE:
			case HEAPTUPLE_INSERT_IN_PROGRESS:

				/*
				 * If we wanted to optimize for aborts, we might consider
				 * marking the page prunable when we see INSERT_IN_PROGRESS.
				 * But we don't.  See related decisions about when to mark the
				 * page prunable in heapam.c.
				 */
				break;

			default:
				elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
				break;
		}

		/*
		 * Remember the last DEAD tuple seen.  We will advance past
		 * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
		 * but we can't advance past anything else.  (XXX is it really worth
		 * continuing to scan beyond RECENTLY_DEAD?  The case where we will
		 * find another DEAD tuple is a fairly unusual corner case.)
		 */
		if (tupdead)
			latestdead = offnum;
		else if (!recent_dead)
			break;

		/*
		 * If the tuple is not HOT-updated, then we are at the end of this
		 * HOT-update chain.
		 */
		if (!HeapTupleHeaderIsHotUpdated(htup))
			break;

		/*
		 * Advance to next chain member.
		 */
		Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
			   BufferGetBlockNumber(buffer));
		offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
		priorXmax = HeapTupleHeaderGetXmax(htup);
	}

	/*
	 * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
	 * the DEAD tuples at the start of the chain are removed and the root line
	 * pointer is appropriately redirected.
	 */
	if (OffsetNumberIsValid(latestdead))
	{
		/*
		 * Mark as unused each intermediate item that we are able to remove
		 * from the chain.
		 *
		 * When the previous item is the last dead tuple seen, we are at the
		 * right candidate for redirection.
		 */
		for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
		{
			heap_prune_record_unused(prstate, chainitems[i]);
			ndeleted++;
		}

		/*
		 * If the root entry had been a normal tuple, we are deleting it, so
		 * count it in the result.	But changing a redirect (even to DEAD
		 * state) doesn't count.
		 */
		if (ItemIdIsNormal(rootlp))
			ndeleted++;

		/*
		 * If the DEAD tuple is at the end of the chain, the entire chain is
		 * dead and the root line pointer can be marked dead.  Otherwise just
		 * redirect the root to the correct chain member.
		 */
		if (i >= nchain)
			heap_prune_record_dead(prstate, rootoffnum);
		else
		{
			heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
			/* If the redirection will be a move, need more processing */
			if (redirect_move)
				redirect_target = chainitems[i];
		}
	}
	else if (nchain < 2 && ItemIdIsRedirected(rootlp))
	{
		/*
		 * We found a redirect item that doesn't point to a valid follow-on
		 * item.  This can happen if the loop in heap_page_prune caused us to
		 * visit the dead successor of a redirect item before visiting the
		 * redirect item.  We can clean up by setting the redirect item to
		 * DEAD state.
		 */
		heap_prune_record_dead(prstate, rootoffnum);
	}
	else if (redirect_move && ItemIdIsRedirected(rootlp))
	{
		/*
		 * If we desire to eliminate LP_REDIRECT items by moving tuples, make
		 * a redirection entry for each redirected root item; this will cause
		 * heap_page_prune_execute to actually do the move. (We get here only
		 * when there are no DEAD tuples in the chain; otherwise the
		 * redirection entry was made above.)
		 */
		heap_prune_record_redirect(prstate, rootoffnum, chainitems[1]);
		redirect_target = chainitems[1];
	}

	/*
	 * If we are going to implement a redirect by moving tuples, we have to
	 * issue a cache invalidation against the redirection target tuple,
	 * because its CTID will be effectively changed by the move.  Note that
	 * CacheInvalidateHeapTuple only queues the request, it doesn't send it;
	 * if we fail before reaching EndNonTransactionalInvalidation, nothing
	 * happens and no harm is done.
	 */
	if (OffsetNumberIsValid(redirect_target))
	{
		ItemId		firstlp = PageGetItemId(dp, redirect_target);
		HeapTupleData firsttup;

		Assert(ItemIdIsNormal(firstlp));
		/* Set up firsttup to reference the tuple at its existing CTID */
		firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp);
		firsttup.t_len = ItemIdGetLength(firstlp);
		ItemPointerSet(&firsttup.t_self,
					   BufferGetBlockNumber(buffer),
					   redirect_target);
		CacheInvalidateHeapTuple(relation, &firsttup);
	}

	return ndeleted;
}
Example #25
0
/*
 * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
 */
void
FinishPreparedTransaction(const char *gid, bool isCommit)
{
	GlobalTransaction gxact;
	TransactionId xid;
	char	   *buf;
	char	   *bufptr;
	TwoPhaseFileHeader *hdr;
	TransactionId latestXid;
	TransactionId *children;
	RelFileNode *commitrels;
	RelFileNode *abortrels;
	RelFileNode *delrels;
	int			ndelrels;
	int			i;

	/*
	 * Validate the GID, and lock the GXACT to ensure that two backends do not
	 * try to commit the same GID at once.
	 */
	gxact = LockGXact(gid, GetUserId());
	xid = gxact->proc.xid;

	/*
	 * Read and validate the state file
	 */
	buf = ReadTwoPhaseFile(xid);
	if (buf == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_DATA_CORRUPTED),
				 errmsg("two-phase state file for transaction %u is corrupt",
						xid)));

	/*
	 * Disassemble the header area
	 */
	hdr = (TwoPhaseFileHeader *) buf;
	Assert(TransactionIdEquals(hdr->xid, xid));
	bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
	children = (TransactionId *) bufptr;
	bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
	commitrels = (RelFileNode *) bufptr;
	bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
	abortrels = (RelFileNode *) bufptr;
	bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));

	/* compute latestXid among all children */
	latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);

	/*
	 * The order of operations here is critical: make the XLOG entry for
	 * commit or abort, then mark the transaction committed or aborted in
	 * pg_clog, then remove its PGPROC from the global ProcArray (which means
	 * TransactionIdIsInProgress will stop saying the prepared xact is in
	 * progress), then run the post-commit or post-abort callbacks. The
	 * callbacks will release the locks the transaction held.
	 */
	if (isCommit)
		RecordTransactionCommitPrepared(xid,
										hdr->nsubxacts, children,
										hdr->ncommitrels, commitrels);
	else
		RecordTransactionAbortPrepared(xid,
									   hdr->nsubxacts, children,
									   hdr->nabortrels, abortrels);

	ProcArrayRemove(&gxact->proc, latestXid);

	/*
	 * In case we fail while running the callbacks, mark the gxact invalid so
	 * no one else will try to commit/rollback, and so it can be recycled
	 * properly later.	It is still locked by our XID so it won't go away yet.
	 *
	 * (We assume it's safe to do this without taking TwoPhaseStateLock.)
	 */
	gxact->valid = false;

	/*
	 * We have to remove any files that were supposed to be dropped. For
	 * consistency with the regular xact.c code paths, must do this before
	 * releasing locks, so do it before running the callbacks.
	 *
	 * NB: this code knows that we couldn't be dropping any temp rels ...
	 */
	if (isCommit)
	{
		delrels = commitrels;
		ndelrels = hdr->ncommitrels;
	}
	else
	{
		delrels = abortrels;
		ndelrels = hdr->nabortrels;
	}
	for (i = 0; i < ndelrels; i++)
	{
		SMgrRelation srel = smgropen(delrels[i]);
		ForkNumber	fork;

		for (fork = 0; fork <= MAX_FORKNUM; fork++)
		{
			if (smgrexists(srel, fork))
				smgrdounlink(srel, fork, false, false);
		}
		smgrclose(srel);
	}

	/* And now do the callbacks */
	if (isCommit)
		ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
	else
		ProcessRecords(bufptr, xid, twophase_postabort_callbacks);

	/* Count the prepared xact as committed or aborted */
	AtEOXact_PgStat(isCommit);

	/*
	 * And now we can clean up our mess.
	 */
	RemoveTwoPhaseFile(xid, true);

	RemoveGXact(gxact);

	pfree(buf);
}
Example #26
0
/*
 * XidInMVCCSnapshot
 *		Is the given XID still-in-progress according to the snapshot?
 *
 * Note: GetSnapshotData never stores either top xid or subxids of our own
 * backend into a snapshot, so these xids will not be reported as "running"
 * by this function.  This is OK for current uses, because we actually only
 * apply this for known-committed XIDs.
 */
static bool
XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
{
	uint32		i;

	/*
	 * Make a quick range check to eliminate most XIDs without looking at the
	 * xip arrays.	Note that this is OK even if we convert a subxact XID to
	 * its parent below, because a subxact with XID < xmin has surely also got
	 * a parent with XID < xmin, while one with XID >= xmax must belong to a
	 * parent that was not yet committed at the time of this snapshot.
	 */

	/* Any xid < xmin is not in-progress */
	if (TransactionIdPrecedes(xid, snapshot->xmin))
		return false;
	/* Any xid >= xmax is in-progress */
	if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
		return true;

	/*
	 * If the snapshot contains full subxact data, the fastest way to check
	 * things is just to compare the given XID against both subxact XIDs and
	 * top-level XIDs.	If the snapshot overflowed, we have to use pg_subtrans
	 * to convert a subxact XID to its parent XID, but then we need only look
	 * at top-level XIDs not subxacts.
	 */
	if (snapshot->subxcnt >= 0)
	{
		/* full data, so search subxip */
		int32		j;

		for (j = 0; j < snapshot->subxcnt; j++)
		{
			if (TransactionIdEquals(xid, snapshot->subxip[j]))
				return true;
		}

		/* not there, fall through to search xip[] */
	}
	else
	{
		/* overflowed, so convert xid to top-level */
		xid = SubTransGetTopmostTransaction(xid);

		/*
		 * If xid was indeed a subxact, we might now have an xid < xmin, so
		 * recheck to avoid an array scan.	No point in rechecking xmax.
		 */
		if (TransactionIdPrecedes(xid, snapshot->xmin))
			return false;
	}

	for (i = 0; i < snapshot->xcnt; i++)
	{
		if (TransactionIdEquals(xid, snapshot->xip[i]))
			return true;
	}

	return false;
}
Example #27
0
/*
 * HeapTupleSatisfiesVacuum
 *
 *	Determine the status of tuples for VACUUM purposes.  Here, what
 *	we mainly want to know is if a tuple is potentially visible to *any*
 *	running transaction.  If so, it can't be removed yet by VACUUM.
 *
 * OldestXmin is a cutoff XID (obtained from GetOldestXmin()).	Tuples
 * deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
 * still be visible to some open transaction, so we can't remove them,
 * even if we see that the deleting transaction has committed.
 */
HTSV_Result
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin,
						 Buffer buffer)
{
	/*
	 * Has inserting transaction committed?
	 *
	 * If the inserting transaction aborted, then the tuple was never visible
	 * to any other transaction, so we can delete it immediately.
	 */
	if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
	{
		if (tuple->t_infomask & HEAP_XMIN_INVALID)
			return HEAPTUPLE_DEAD;
		else if (tuple->t_infomask & HEAP_MOVED_OFF)
		{
			TransactionId xvac = HeapTupleHeaderGetXvac(tuple);

			if (TransactionIdIsCurrentTransactionId(xvac))
				return HEAPTUPLE_DELETE_IN_PROGRESS;
			if (TransactionIdIsInProgress(xvac))
				return HEAPTUPLE_DELETE_IN_PROGRESS;
			if (TransactionIdDidCommit(xvac))
			{
				SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
							InvalidTransactionId);
				return HEAPTUPLE_DEAD;
			}
			SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
						InvalidTransactionId);
		}
		else if (tuple->t_infomask & HEAP_MOVED_IN)
		{
			TransactionId xvac = HeapTupleHeaderGetXvac(tuple);

			if (TransactionIdIsCurrentTransactionId(xvac))
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			if (TransactionIdIsInProgress(xvac))
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			if (TransactionIdDidCommit(xvac))
				SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
							InvalidTransactionId);
			else
			{
				SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
							InvalidTransactionId);
				return HEAPTUPLE_DEAD;
			}
		}
		else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple)))
		{
			if (tuple->t_infomask & HEAP_XMAX_INVALID)	/* xid invalid */
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			if (tuple->t_infomask & HEAP_IS_LOCKED)
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			/* inserted and then deleted by same xact */
			return HEAPTUPLE_DELETE_IN_PROGRESS;
		}
		else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
			SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
						HeapTupleHeaderGetXmin(tuple));
		else
		{
			/*
			 * Not in Progress, Not Committed, so either Aborted or crashed
			 */
			SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
						InvalidTransactionId);
			return HEAPTUPLE_DEAD;
		}

		/*
		 * At this point the xmin is known committed, but we might not have
		 * been able to set the hint bit yet; so we can no longer Assert that
		 * it's set.
		 */
	}

	/*
	 * Okay, the inserter committed, so it was good at some point.	Now what
	 * about the deleting transaction?
	 */
	if (tuple->t_infomask & HEAP_XMAX_INVALID)
		return HEAPTUPLE_LIVE;

	if (tuple->t_infomask & HEAP_IS_LOCKED)
	{
		/*
		 * "Deleting" xact really only locked it, so the tuple is live in any
		 * case.  However, we should make sure that either XMAX_COMMITTED or
		 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
		 * examining the tuple for future xacts.  Also, marking dead
		 * MultiXacts as invalid here provides defense against MultiXactId
		 * wraparound (see also comments in heap_freeze_tuple()).
		 */
		if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
		{
			if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
			{
				if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple)))
					return HEAPTUPLE_LIVE;
			}
			else
			{
				if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
					return HEAPTUPLE_LIVE;
			}

			/*
			 * We don't really care whether xmax did commit, abort or crash.
			 * We know that xmax did lock the tuple, but it did not and will
			 * never actually update it.
			 */
			SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
						InvalidTransactionId);
		}
		return HEAPTUPLE_LIVE;
	}

	if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
	{
		/* MultiXacts are currently only allowed to lock tuples */
		Assert(tuple->t_infomask & HEAP_IS_LOCKED);
		return HEAPTUPLE_LIVE;
	}

	if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
	{
		if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
			return HEAPTUPLE_DELETE_IN_PROGRESS;
		else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
			SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
						HeapTupleHeaderGetXmax(tuple));
		else
		{
			/*
			 * Not in Progress, Not Committed, so either Aborted or crashed
			 */
			SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
						InvalidTransactionId);
			return HEAPTUPLE_LIVE;
		}

		/*
		 * At this point the xmax is known committed, but we might not have
		 * been able to set the hint bit yet; so we can no longer Assert that
		 * it's set.
		 */
	}

	/*
	 * Deleter committed, but check special cases.
	 */

	if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple),
							HeapTupleHeaderGetXmax(tuple)))
	{
		/*
		 * Inserter also deleted it, so it was never visible to anyone else.
		 * However, we can only remove it early if it's not an updated tuple;
		 * else its parent tuple is linking to it via t_ctid, and this tuple
		 * mustn't go away before the parent does.
		 */
		if (!(tuple->t_infomask & HEAP_UPDATED))
			return HEAPTUPLE_DEAD;
	}

	if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin))
	{
		/* deleting xact is too recent, tuple could still be visible */
		return HEAPTUPLE_RECENTLY_DEAD;
	}

	/* Otherwise, it's dead and removable */
	return HEAPTUPLE_DEAD;
}
Example #28
0
/*
 * HeapTupleSatisfiesSnapshot
 *		True iff heap tuple is valid for the given snapshot.
 *
 *	Here, we consider the effects of:
 *		all transactions committed as of the time of the given snapshot
 *		previous commands of this transaction
 *
 *	Does _not_ include:
 *		transactions shown as in-progress by the snapshot
 *		transactions started after the snapshot was taken
 *		changes made by the current command
 *
 * This is the same as HeapTupleSatisfiesNow, except that transactions that
 * were in progress or as yet unstarted when the snapshot was taken will
 * be treated as uncommitted, even if they have committed by now.
 *
 * (Notice, however, that the tuple status hint bits will be updated on the
 * basis of the true state of the transaction, even if we then pretend we
 * can't see it.)
 */
bool
HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
{
	if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
	{
		if (tuple->t_infomask & HEAP_XMIN_INVALID)
			return false;

		if (tuple->t_infomask & HEAP_MOVED_OFF)
		{
			TransactionId xvac = HeapTupleHeaderGetXvac(tuple);

			if (TransactionIdIsCurrentTransactionId(xvac))
				return false;
			if (!TransactionIdIsInProgress(xvac))
			{
				if (TransactionIdDidCommit(xvac))
				{
					tuple->t_infomask |= HEAP_XMIN_INVALID;
					return false;
				}
				tuple->t_infomask |= HEAP_XMIN_COMMITTED;
			}
		}
		else if (tuple->t_infomask & HEAP_MOVED_IN)
		{
			TransactionId xvac = HeapTupleHeaderGetXvac(tuple);

			if (!TransactionIdIsCurrentTransactionId(xvac))
			{
				if (TransactionIdIsInProgress(xvac))
					return false;
				if (TransactionIdDidCommit(xvac))
					tuple->t_infomask |= HEAP_XMIN_COMMITTED;
				else
				{
					tuple->t_infomask |= HEAP_XMIN_INVALID;
					return false;
				}
			}
		}
		else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple)))
		{
			if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid)
				return false;	/* inserted after scan started */

			if (tuple->t_infomask & HEAP_XMAX_INVALID)	/* xid invalid */
				return true;

			Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));

			if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
				return true;

			if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
				return true;	/* deleted after scan started */
			else
				return false;	/* deleted before scan started */
		}
		else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple)))
			return false;
		else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
			tuple->t_infomask |= HEAP_XMIN_COMMITTED;
		else
		{
			/* it must have aborted or crashed */
			tuple->t_infomask |= HEAP_XMIN_INVALID;
			return false;
		}
	}

	/*
	 * By here, the inserting transaction has committed - have to check
	 * when...
	 */
	if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple),
									 snapshot->xmin))
	{
		uint32		i;

		if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple),
										 snapshot->xmax))
			return false;

		for (i = 0; i < snapshot->xcnt; i++)
		{
			if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple),
									snapshot->xip[i]))
				return false;
		}
	}

	if (tuple->t_infomask & HEAP_XMAX_INVALID)	/* xid invalid or aborted */
		return true;

	if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
		return true;

	if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
	{
		if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
		{
			if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
				return true;	/* deleted after scan started */
			else
				return false;	/* deleted before scan started */
		}

		if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
			return true;

		if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
		{
			/* it must have aborted or crashed */
			tuple->t_infomask |= HEAP_XMAX_INVALID;
			return true;
		}

		/* xmax transaction committed */
		tuple->t_infomask |= HEAP_XMAX_COMMITTED;
	}

	/*
	 * OK, the deleting transaction committed too ... but when?
	 */
	if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xmin))
	{
		uint32		i;

		if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple),
										 snapshot->xmax))
			return true;
		for (i = 0; i < snapshot->xcnt; i++)
		{
			if (TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xip[i]))
				return true;
		}
	}

	return false;
}
Example #29
0
/*
 * RecoverPreparedTransactions
 *
 * Scan the pg_twophase directory and reload shared-memory state for each
 * prepared transaction (reacquire locks, etc).  This is run during database
 * startup.
 */
void
RecoverPreparedTransactions(void)
{
	char		dir[MAXPGPATH];
	DIR		   *cldir;
	struct dirent *clde;

	snprintf(dir, MAXPGPATH, "%s", TWOPHASE_DIR);

	cldir = AllocateDir(dir);
	while ((clde = ReadDir(cldir, dir)) != NULL)
	{
		if (strlen(clde->d_name) == 8 &&
			strspn(clde->d_name, "0123456789ABCDEF") == 8)
		{
			TransactionId xid;
			char	   *buf;
			char	   *bufptr;
			TwoPhaseFileHeader *hdr;
			TransactionId *subxids;
			GlobalTransaction gxact;
			int			i;

			xid = (TransactionId) strtoul(clde->d_name, NULL, 16);

			/* Already processed? */
			if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
			{
				ereport(WARNING,
						(errmsg("removing stale two-phase state file \"%s\"",
								clde->d_name)));
				RemoveTwoPhaseFile(xid, true);
				continue;
			}

			/* Read and validate file */
			buf = ReadTwoPhaseFile(xid);
			if (buf == NULL)
			{
				ereport(WARNING,
					  (errmsg("removing corrupt two-phase state file \"%s\"",
							  clde->d_name)));
				RemoveTwoPhaseFile(xid, true);
				continue;
			}

			ereport(LOG,
					(errmsg("recovering prepared transaction %u", xid)));

			/* Deconstruct header */
			hdr = (TwoPhaseFileHeader *) buf;
			Assert(TransactionIdEquals(hdr->xid, xid));
			bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
			subxids = (TransactionId *) bufptr;
			bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
			bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
			bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));

			/*
			 * Reconstruct subtrans state for the transaction --- needed
			 * because pg_subtrans is not preserved over a restart.  Note that
			 * we are linking all the subtransactions directly to the
			 * top-level XID; there may originally have been a more complex
			 * hierarchy, but there's no need to restore that exactly.
			 */
			for (i = 0; i < hdr->nsubxacts; i++)
				SubTransSetParent(subxids[i], xid);

			/*
			 * Recreate its GXACT and dummy PGPROC
			 *
			 * Note: since we don't have the PREPARE record's WAL location at
			 * hand, we leave prepare_lsn zeroes.  This means the GXACT will
			 * be fsync'd on every future checkpoint.  We assume this
			 * situation is infrequent enough that the performance cost is
			 * negligible (especially since we know the state file has already
			 * been fsynced).
			 */
			gxact = MarkAsPreparing(xid, hdr->gid,
									hdr->prepared_at,
									hdr->owner, hdr->database);
			GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
			MarkAsPrepared(gxact);

			/*
			 * Recover other state (notably locks) using resource managers
			 */
			ProcessRecords(bufptr, xid, twophase_recover_callbacks);

			pfree(buf);
		}
	}
	FreeDir(cldir);
}
Example #30
0
/*
 * HeapTupleSatisfiesVacuum
 *
 *	Determine the status of tuples for VACUUM purposes.  Here, what
 *	we mainly want to know is if a tuple is potentially visible to *any*
 *	running transaction.  If so, it can't be removed yet by VACUUM.
 *
 * OldestXmin is a cutoff XID (obtained from GetOldestXmin()).	Tuples
 * deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
 * still be visible to some open transaction, so we can't remove them,
 * even if we see that the deleting transaction has committed.
 */
HTSV_Result
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
{
	/*
	 * Has inserting transaction committed?
	 *
	 * If the inserting transaction aborted, then the tuple was never visible
	 * to any other transaction, so we can delete it immediately.
	 */
	if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
	{
		if (tuple->t_infomask & HEAP_XMIN_INVALID)
			return HEAPTUPLE_DEAD;
		else if (tuple->t_infomask & HEAP_MOVED_OFF)
		{
			TransactionId xvac = HeapTupleHeaderGetXvac(tuple);

			if (TransactionIdIsCurrentTransactionId(xvac))
				return HEAPTUPLE_DELETE_IN_PROGRESS;
			if (TransactionIdIsInProgress(xvac))
				return HEAPTUPLE_DELETE_IN_PROGRESS;
			if (TransactionIdDidCommit(xvac))
			{
				tuple->t_infomask |= HEAP_XMIN_INVALID;
				return HEAPTUPLE_DEAD;
			}
			tuple->t_infomask |= HEAP_XMIN_COMMITTED;
		}
		else if (tuple->t_infomask & HEAP_MOVED_IN)
		{
			TransactionId xvac = HeapTupleHeaderGetXvac(tuple);

			if (TransactionIdIsCurrentTransactionId(xvac))
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			if (TransactionIdIsInProgress(xvac))
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			if (TransactionIdDidCommit(xvac))
				tuple->t_infomask |= HEAP_XMIN_COMMITTED;
			else
			{
				tuple->t_infomask |= HEAP_XMIN_INVALID;
				return HEAPTUPLE_DEAD;
			}
		}
		else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple)))
		{
			if (tuple->t_infomask & HEAP_XMAX_INVALID)	/* xid invalid */
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			Assert(HeapTupleHeaderGetXmin(tuple) ==
				   HeapTupleHeaderGetXmax(tuple));
			if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
				return HEAPTUPLE_INSERT_IN_PROGRESS;
			/* inserted and then deleted by same xact */
			return HEAPTUPLE_DELETE_IN_PROGRESS;
		}
		else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
			tuple->t_infomask |= HEAP_XMIN_COMMITTED;
		else
		{
			/*
			 * Not in Progress, Not Committed, so either Aborted or
			 * crashed
			 */
			tuple->t_infomask |= HEAP_XMIN_INVALID;
			return HEAPTUPLE_DEAD;
		}
		/* Should only get here if we set XMIN_COMMITTED */
		Assert(tuple->t_infomask & HEAP_XMIN_COMMITTED);
	}

	/*
	 * Okay, the inserter committed, so it was good at some point.	Now
	 * what about the deleting transaction?
	 */
	if (tuple->t_infomask & HEAP_XMAX_INVALID)
		return HEAPTUPLE_LIVE;

	if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE)
	{
		/*
		 * "Deleting" xact really only marked it for update, so the tuple
		 * is live in any case.  However, we must make sure that either
		 * XMAX_COMMITTED or XMAX_INVALID gets set once the xact is gone;
		 * otherwise it is unsafe to recycle CLOG status after vacuuming.
		 */
		if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
		{
			if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
				return HEAPTUPLE_LIVE;

			/*
			 * We don't really care whether xmax did commit, abort or
			 * crash. We know that xmax did mark the tuple for update, but
			 * it did not and will never actually update it.
			 */
			tuple->t_infomask |= HEAP_XMAX_INVALID;
		}
		return HEAPTUPLE_LIVE;
	}

	if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
	{
		if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
			return HEAPTUPLE_DELETE_IN_PROGRESS;
		else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
			tuple->t_infomask |= HEAP_XMAX_COMMITTED;
		else
		{
			/*
			 * Not in Progress, Not Committed, so either Aborted or
			 * crashed
			 */
			tuple->t_infomask |= HEAP_XMAX_INVALID;
			return HEAPTUPLE_LIVE;
		}
		/* Should only get here if we set XMAX_COMMITTED */
		Assert(tuple->t_infomask & HEAP_XMAX_COMMITTED);
	}

	/*
	 * Deleter committed, but check special cases.
	 */

	if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple),
							HeapTupleHeaderGetXmax(tuple)))
	{
		/*
		 * Inserter also deleted it, so it was never visible to anyone
		 * else.  However, we can only remove it early if it's not an
		 * updated tuple; else its parent tuple is linking to it via t_ctid,
		 * and this tuple mustn't go away before the parent does.
		 */
		if (!(tuple->t_infomask & HEAP_UPDATED))
			return HEAPTUPLE_DEAD;
	}

	if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin))
	{
		/* deleting xact is too recent, tuple could still be visible */
		return HEAPTUPLE_RECENTLY_DEAD;
	}

	/* Otherwise, it's dead and removable */
	return HEAPTUPLE_DEAD;
}