Exemplo n.º 1
0
/*
 * TransactionIdPrecedesOrEquals --- is id1 logically <= id2?
 */
bool
TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
{
	int32		diff;

	if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
		return (id1 <= id2);

	diff = (int32) (id1 - id2);
	return (diff <= 0);
}
Exemplo n.º 2
0
/*
 * TransactionIdFollowsOrEquals --- is id1 logically >= id2?
 */
bool
TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
{
	int32		diff;

	if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
		return (id1 >= id2);

	diff = (int32) (id1 - id2);
	return (diff >= 0);
}
Exemplo n.º 3
0
/*
 * TransactionIdPrecedes --- is id1 logically < id2?
 */
bool
TransactionIdPrecedes(TransactionId id1, TransactionId id2)
{
	/*
	 * If either ID is a permanent XID then we can just do unsigned
	 * comparison.	If both are normal, do a modulo-2^31 comparison.
	 */
	int32		diff;

	if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
		return (id1 < id2);

	diff = (int32) (id1 - id2);
	return (diff < 0);
}
Exemplo n.º 4
0
/*
 * Interrogate the parent of a transaction in the subtrans log.
 */
TransactionId
SubTransGetParent(TransactionId xid)
{
	int			pageno = TransactionIdToPage(xid);
	int			entryno = TransactionIdToEntry(xid);
	int			slotno;
	TransactionId *ptr;
	TransactionId parent;

	/* Can't ask about stuff that might not be around anymore */
	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));

	/* Bootstrap and frozen XIDs have no parent */
	if (!TransactionIdIsNormal(xid))
		return InvalidTransactionId;

	/* lock is acquired by SimpleLruReadPage_ReadOnly */

	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
	ptr += entryno;

	parent = *ptr;

	LWLockRelease(SubtransControlLock);

	return parent;
}
Exemplo n.º 5
0
/*
 * do a TransactionId -> txid conversion for an XID near the given epoch
 */
static txid
convert_xid(TransactionId xid, const TxidEpoch *state)
{
#ifndef INT64_IS_BUSTED
	uint64		epoch;

	/* return special xid's as-is */
	if (!TransactionIdIsNormal(xid))
		return (txid) xid;

	/* xid can be on either side when near wrap-around */
	epoch = (uint64) state->epoch;
	if (xid > state->last_xid &&
		TransactionIdPrecedes(xid, state->last_xid))
		epoch--;
	else if (xid < state->last_xid &&
			 TransactionIdFollows(xid, state->last_xid))
		epoch++;

	return (epoch << 32) | xid;
#else							/* INT64_IS_BUSTED */
	/* we can't do anything with the epoch, so ignore it */
	return (txid) xid & MAX_TXID;
#endif   /* INT64_IS_BUSTED */
}
Exemplo n.º 6
0
/*
 * TransactionIdGetCommitLSN
 *
 * This function returns an LSN that is late enough to be able
 * to guarantee that if we flush up to the LSN returned then we
 * will have flushed the transaction's commit record to disk.
 *
 * The result is not necessarily the exact LSN of the transaction's
 * commit record!  For example, for long-past transactions (those whose
 * clog pages already migrated to disk), we'll return InvalidXLogRecPtr.
 * Also, because we group transactions on the same clog page to conserve
 * storage, we might return the LSN of a later transaction that falls into
 * the same group.
 */
XLogRecPtr
TransactionIdGetCommitLSN(TransactionId xid)
{
	XLogRecPtr	result;

	/*
	 * Currently, all uses of this function are for xids that were just
	 * reported to be committed by TransactionLogFetch, so we expect that
	 * checking TransactionLogFetch's cache will usually succeed and avoid an
	 * extra trip to shared memory.
	 */
	if (TransactionIdEquals(xid, cachedFetchXid))
		return cachedCommitLSN;

	/* Special XIDs are always known committed */
	if (!TransactionIdIsNormal(xid))
		return InvalidXLogRecPtr;

	/*
	 * Get the transaction status.
	 */
	(void) TransactionIdGetStatus(xid, &result);

	return result;
}
Exemplo n.º 7
0
/*
 * GetOldestXmin -- returns oldest transaction that was running
 *					when any current transaction was started.
 *
 * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
 * then only backends running in my own database are considered.
 *
 * This is used by VACUUM to decide which deleted tuples must be preserved
 * in a table.	allDbs = TRUE is needed for shared relations, but allDbs =
 * FALSE is sufficient for non-shared relations, since only backends in my
 * own database could ever see the tuples in them.
 *
 * Note: we include the currently running xids in the set of considered xids.
 * This ensures that if a just-started xact has not yet set its snapshot,
 * when it does set the snapshot it cannot set xmin less than what we compute.
 */
TransactionId
GetOldestXmin(bool allDbs)
{
	SISeg	   *segP = shmInvalBuffer;
	ProcState  *stateP = segP->procState;
	TransactionId result;
	int			index;

	result = GetCurrentTransactionId();

	LWLockAcquire(SInvalLock, LW_SHARED);

	for (index = 0; index < segP->lastBackend; index++)
	{
		SHMEM_OFFSET pOffset = stateP[index].procStruct;

		if (pOffset != INVALID_OFFSET)
		{
			PGPROC	   *proc = (PGPROC *) MAKE_PTR(pOffset);

			if (allDbs || proc->databaseId == MyDatabaseId)
			{
				/* Fetch xid just once - see GetNewTransactionId */
				TransactionId xid = proc->xid;

				if (TransactionIdIsNormal(xid))
				{
					if (TransactionIdPrecedes(xid, result))
						result = xid;
					xid = proc->xmin;
					if (TransactionIdIsNormal(xid))
						if (TransactionIdPrecedes(xid, result))
							result = xid;
				}
			}
		}
	}

	LWLockRelease(SInvalLock);

	return result;
}
Exemplo n.º 8
0
/* Record lowest soon-prunable XID */
static void
heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
{
	/*
	 * This should exactly match the PageSetPrunable macro.  We can't store
	 * directly into the page header yet, so we update working state.
	 */
	Assert(TransactionIdIsNormal(xid));
	if (!TransactionIdIsValid(prstate->new_prune_xid) ||
		TransactionIdPrecedes(xid, prstate->new_prune_xid))
		prstate->new_prune_xid = xid;
}
/*
 *		xid_age			- compute age of an XID (relative to current xact)
 */
Datum
xid_age(PG_FUNCTION_ARGS)
{
    TransactionId xid = PG_GETARG_TRANSACTIONID(0);
    TransactionId now = GetTopTransactionId();

    /* Permanent XIDs are always infinitely old */
    if (!TransactionIdIsNormal(xid))
        PG_RETURN_INT32(INT_MAX);

    PG_RETURN_INT32((int32) (now - xid));
}
Exemplo n.º 10
0
/*
 * Sets the commit timestamp of a single transaction.
 *
 * Must be called with CommitTsControlLock held
 */
static void
TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
						 RepOriginId nodeid, int slotno)
{
	int			entryno = TransactionIdToCTsEntry(xid);
	CommitTimestampEntry entry;

	Assert(TransactionIdIsNormal(xid));

	entry.time = ts;
	entry.nodeid = nodeid;

	memcpy(CommitTsCtl->shared->page_buffer[slotno] +
		   SizeOfCommitTimestampEntry * entryno,
		   &entry, SizeOfCommitTimestampEntry);
}
Exemplo n.º 11
0
/*
 * Get oldest Xid visible by any active transaction (global or local)
 * Take in account global Xmin received from DTMD
 */
static TransactionId DtmGetOldestXmin(Relation rel, bool ignoreVacuum)
{
	TransactionId localXmin = PgGetOldestXmin(rel, ignoreVacuum);
	TransactionId globalXmin = dtm->minXid;
	XTM_INFO("XTM: DtmGetOldestXmin localXmin=%d, globalXmin=%d\n", localXmin, globalXmin);

	if (TransactionIdIsValid(globalXmin))
	{
		globalXmin -= vacuum_defer_cleanup_age;
		if (!TransactionIdIsNormal(globalXmin))
			globalXmin = FirstNormalTransactionId;
		if (TransactionIdPrecedes(globalXmin, localXmin))
			localXmin = globalXmin;
		XTM_INFO("XTM: DtmGetOldestXmin adjusted localXmin=%d, globalXmin=%d\n", localXmin, globalXmin);
	}
	return localXmin;
}
Exemplo n.º 12
0
/*
 * TransactionLogFetch --- fetch commit status of specified transaction id
 */
static XidStatus
TransactionLogFetch(TransactionId transactionId)
{
	XidStatus	xidstatus;
	XLogRecPtr	xidlsn;

	/*
	 * Before going to the commit log manager, check our single item cache to
	 * see if we didn't just check the transaction status a moment ago.
	 */
	if (TransactionIdEquals(transactionId, cachedFetchXid))
		return cachedFetchXidStatus;

	/*
	 * Also, check to see if the transaction ID is a permanent one.
	 */
	if (!TransactionIdIsNormal(transactionId))
	{
		if (TransactionIdEquals(transactionId, BootstrapTransactionId))
			return TRANSACTION_STATUS_COMMITTED;
		if (TransactionIdEquals(transactionId, FrozenTransactionId))
			return TRANSACTION_STATUS_COMMITTED;
		return TRANSACTION_STATUS_ABORTED;
	}

	/*
	 * Get the transaction status.
	 */
	xidstatus = TransactionIdGetStatus(transactionId, &xidlsn);

	/*
	 * Cache it, but DO NOT cache status for unfinished or sub-committed
	 * transactions!  We only cache status that is guaranteed not to change.
	 */
	if (xidstatus != TRANSACTION_STATUS_IN_PROGRESS &&
		xidstatus != TRANSACTION_STATUS_SUB_COMMITTED)
	{
		cachedFetchXid = transactionId;
		cachedFetchXidStatus = xidstatus;
		cachedCommitLSN = xidlsn;
	}

	return xidstatus;
}
Exemplo n.º 13
0
static void
SubTransGetData(TransactionId xid, SubTransData* subData)
{
	MIRRORED_LOCK_DECLARE;

	int			pageno = TransactionIdToPage(xid);
	int			entryno = TransactionIdToEntry(xid);
	int			slotno;
	SubTransData *ptr;

	/* Can't ask about stuff that might not be around anymore */
	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));

	/* Bootstrap and frozen XIDs have no parent and itself as topMostParent */
	if (!TransactionIdIsNormal(xid))
	{
		subData->parent = InvalidTransactionId;
		subData->topMostParent = xid;
		return;
	}

	MIRRORED_LOCK;

	/* lock is acquired by SimpleLruReadPage_ReadOnly */

	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid, NULL);
	ptr = (SubTransData *) SubTransCtl->shared->page_buffer[slotno];
	ptr += entryno;

	subData->parent = ptr->parent;
	subData->topMostParent = ptr->topMostParent;
	if ( subData->topMostParent == InvalidTransactionId )
	{
		/* Here means parent is Main XID, hence set parent itself as topMostParent */
		subData->topMostParent = xid;
	}

	LWLockRelease(SubtransControlLock);

	MIRRORED_UNLOCK;

	return;
}
Exemplo n.º 14
0
/* adapted from Postgres' txid.c#convert_xid function */
uint64 convert_xid(TransactionId xid) {
    TxidEpoch state;
    uint64    epoch;

    GetNextXidAndEpoch(&state.last_xid, &state.epoch);

    /* return special xid's as-is */
    if (!TransactionIdIsNormal(xid))
        return (uint64) xid;

    /* xid can be on either side when near wrap-around */
    epoch = (uint64) state.epoch;
    if (xid > state.last_xid && TransactionIdPrecedes(xid, state.last_xid))
        epoch--;
    else if (xid < state.last_xid && TransactionIdFollows(xid, state.last_xid))
        epoch++;

    return (epoch << 32) | xid;
}
Exemplo n.º 15
0
/*
 * do a TransactionId -> txid conversion for an XID near the given epoch
 */
static txid
convert_xid(TransactionId xid, const TxidEpoch *state)
{
	uint64		epoch;

	/* return special xid's as-is */
	if (!TransactionIdIsNormal(xid))
		return (txid) xid;

	/* xid can be on either side when near wrap-around */
	epoch = (uint64) state->epoch;
	if (xid > state->last_xid &&
		TransactionIdPrecedes(xid, state->last_xid))
		epoch--;
	else if (xid < state->last_xid &&
			 TransactionIdFollows(xid, state->last_xid))
		epoch++;

	return (epoch << 32) | xid;
}
Exemplo n.º 16
0
Datum
pg_last_committed_xact(PG_FUNCTION_ARGS)
{
	TransactionId xid;
	TimestampTz ts;
	Datum		values[2];
	bool		nulls[2];
	TupleDesc	tupdesc;
	HeapTuple	htup;

	/* and construct a tuple with our data */
	xid = GetLatestCommitTsData(&ts, NULL);

	/*
	 * Construct a tuple descriptor for the result row.  This must match this
	 * function's pg_proc entry!
	 */
	tupdesc = CreateTemplateTupleDesc(2, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "xid",
					   XIDOID, -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "timestamp",
					   TIMESTAMPTZOID, -1, 0);
	tupdesc = BlessTupleDesc(tupdesc);

	if (!TransactionIdIsNormal(xid))
	{
		memset(nulls, true, sizeof(nulls));
	}
	else
	{
		values[0] = TransactionIdGetDatum(xid);
		nulls[0] = false;

		values[1] = TimestampTzGetDatum(ts);
		nulls[1] = false;
	}

	htup = heap_form_tuple(tupdesc, values, nulls);

	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
}
Exemplo n.º 17
0
/*
 * Update local Recent*Xmin variables taken in account MinXmin received from DTMD
 */
static void DtmUpdateRecentXmin(Snapshot snapshot)
{
	TransactionId xmin = dtm->minXid;
	XTM_INFO("XTM: DtmUpdateRecentXmin global xmin=%d, snapshot xmin %d\n", dtm->minXid, DtmSnapshot.xmin);

	if (TransactionIdIsValid(xmin))
	{
		xmin -= vacuum_defer_cleanup_age;
		if (!TransactionIdIsNormal(xmin))
			xmin = FirstNormalTransactionId;
		if (TransactionIdFollows(RecentGlobalDataXmin, xmin))
			RecentGlobalDataXmin = xmin;
		if (TransactionIdFollows(RecentGlobalXmin, xmin))
			RecentGlobalXmin = xmin;
	}
	if (TransactionIdFollows(RecentXmin, snapshot->xmin))
	{
		ProcArrayInstallImportedXmin(snapshot->xmin, GetCurrentTransactionId());
		RecentXmin = snapshot->xmin;
	}
}
Exemplo n.º 18
0
/*----------
 * GetSnapshotData -- returns information about running transactions.
 *
 * The returned snapshot includes xmin (lowest still-running xact ID),
 * xmax (next xact ID to be assigned), and a list of running xact IDs
 * in the range xmin <= xid < xmax.  It is used as follows:
 *		All xact IDs < xmin are considered finished.
 *		All xact IDs >= xmax are considered still running.
 *		For an xact ID xmin <= xid < xmax, consult list to see whether
 *		it is considered running or not.
 * This ensures that the set of transactions seen as "running" by the
 * current xact will not change after it takes the snapshot.
 *
 * We also compute the current global xmin (oldest xmin across all running
 * transactions) and save it in RecentGlobalXmin.  This is the same
 * computation done by GetOldestXmin(TRUE).  The xmin value is also stored
 * into RecentXmin.
 *----------
 */
Snapshot
GetSnapshotData(Snapshot snapshot, bool serializable)
{
	SISeg	   *segP = shmInvalBuffer;
	ProcState  *stateP = segP->procState;
	TransactionId xmin;
	TransactionId xmax;
	TransactionId globalxmin;
	int			index;
	int			count = 0;

	Assert(snapshot != NULL);

	/*
	 * Allocating space for MaxBackends xids is usually overkill;
	 * lastBackend would be sufficient.  But it seems better to do the
	 * malloc while not holding the lock, so we can't look at lastBackend.
	 *
	 * This does open a possibility for avoiding repeated malloc/free:
	 * since MaxBackends does not change at runtime, we can simply reuse
	 * the previous xip array if any.  (This relies on the fact that all
	 * calls pass static SnapshotData structs.)
	 */
	if (snapshot->xip == NULL)
	{
		/*
		 * First call for this snapshot
		 */
		snapshot->xip = (TransactionId *)
			malloc(MaxBackends * sizeof(TransactionId));
		if (snapshot->xip == NULL)
			ereport(ERROR,
					(errcode(ERRCODE_OUT_OF_MEMORY),
					 errmsg("out of memory")));
	}

	globalxmin = xmin = GetCurrentTransactionId();

	/*
	 * If we are going to set MyProc->xmin then we'd better get exclusive
	 * lock; if not, this is a read-only operation so it can be shared.
	 */
	LWLockAcquire(SInvalLock, serializable ? LW_EXCLUSIVE : LW_SHARED);

	/*--------------------
	 * Unfortunately, we have to call ReadNewTransactionId() after acquiring
	 * SInvalLock above.  It's not good because ReadNewTransactionId() does
	 * LWLockAcquire(XidGenLock), but *necessary*.	We need to be sure that
	 * no transactions exit the set of currently-running transactions
	 * between the time we fetch xmax and the time we finish building our
	 * snapshot.  Otherwise we could have a situation like this:
	 *
	 *		1. Tx Old is running (in Read Committed mode).
	 *		2. Tx S reads new transaction ID into xmax, then
	 *		   is swapped out before acquiring SInvalLock.
	 *		3. Tx New gets new transaction ID (>= S' xmax),
	 *		   makes changes and commits.
	 *		4. Tx Old changes some row R changed by Tx New and commits.
	 *		5. Tx S finishes getting its snapshot data.  It sees Tx Old as
	 *		   done, but sees Tx New as still running (since New >= xmax).
	 *
	 * Now S will see R changed by both Tx Old and Tx New, *but* does not
	 * see other changes made by Tx New.  If S is supposed to be in
	 * Serializable mode, this is wrong.
	 *
	 * By locking SInvalLock before we read xmax, we ensure that TX Old
	 * cannot exit the set of running transactions seen by Tx S.  Therefore
	 * both Old and New will be seen as still running => no inconsistency.
	 *--------------------
	 */

	xmax = ReadNewTransactionId();

	for (index = 0; index < segP->lastBackend; index++)
	{
		SHMEM_OFFSET pOffset = stateP[index].procStruct;

		if (pOffset != INVALID_OFFSET)
		{
			PGPROC	   *proc = (PGPROC *) MAKE_PTR(pOffset);

			/* Fetch xid just once - see GetNewTransactionId */
			TransactionId xid = proc->xid;

			/*
			 * Ignore my own proc (dealt with my xid above), procs not
			 * running a transaction, and xacts started since we read the
			 * next transaction ID.  There's no need to store XIDs above
			 * what we got from ReadNewTransactionId, since we'll treat
			 * them as running anyway.	We also assume that such xacts
			 * can't compute an xmin older than ours, so they needn't be
			 * considered in computing globalxmin.
			 */
			if (proc == MyProc ||
				!TransactionIdIsNormal(xid) ||
				TransactionIdFollowsOrEquals(xid, xmax))
				continue;

			if (TransactionIdPrecedes(xid, xmin))
				xmin = xid;
			snapshot->xip[count] = xid;
			count++;

			/* Update globalxmin to be the smallest valid xmin */
			xid = proc->xmin;
			if (TransactionIdIsNormal(xid))
				if (TransactionIdPrecedes(xid, globalxmin))
					globalxmin = xid;
		}
	}

	if (serializable)
		MyProc->xmin = xmin;

	LWLockRelease(SInvalLock);

	/* Serializable snapshot must be computed before any other... */
	Assert(TransactionIdIsValid(MyProc->xmin));

	/*
	 * Update globalxmin to include actual process xids.  This is a
	 * slightly different way of computing it than GetOldestXmin uses, but
	 * should give the same result.
	 */
	if (TransactionIdPrecedes(xmin, globalxmin))
		globalxmin = xmin;

	/* Update globals for use by VACUUM */
	RecentGlobalXmin = globalxmin;
	RecentXmin = xmin;

	snapshot->xmin = xmin;
	snapshot->xmax = xmax;
	snapshot->xcnt = count;

	snapshot->curcid = GetCurrentCommandId();

	return snapshot;
}
Exemplo n.º 19
0
/*
 * Interrogate the commit timestamp of a transaction.
 *
 * The return value indicates whether a commit timestamp record was found for
 * the given xid.  The timestamp value is returned in *ts (which may not be
 * null), and the origin node for the Xid is returned in *nodeid, if it's not
 * null.
 */
bool
TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
							 RepOriginId *nodeid)
{
	int			pageno = TransactionIdToCTsPage(xid);
	int			entryno = TransactionIdToCTsEntry(xid);
	int			slotno;
	CommitTimestampEntry entry;
	TransactionId oldestCommitTsXid;
	TransactionId newestCommitTsXid;

	if (!TransactionIdIsValid(xid))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
		errmsg("cannot retrieve commit timestamp for transaction %u", xid)));
	else if (!TransactionIdIsNormal(xid))
	{
		/* frozen and bootstrap xids are always committed far in the past */
		*ts = 0;
		if (nodeid)
			*nodeid = 0;
		return false;
	}

	LWLockAcquire(CommitTsLock, LW_SHARED);

	/* Error if module not enabled */
	if (!commitTsShared->commitTsActive)
		error_commit_ts_disabled();

	/*
	 * If we're asked for the cached value, return that.  Otherwise, fall
	 * through to read from SLRU.
	 */
	if (commitTsShared->xidLastCommit == xid)
	{
		*ts = commitTsShared->dataLastCommit.time;
		if (nodeid)
			*nodeid = commitTsShared->dataLastCommit.nodeid;

		LWLockRelease(CommitTsLock);
		return *ts != 0;
	}

	oldestCommitTsXid = ShmemVariableCache->oldestCommitTsXid;
	newestCommitTsXid = ShmemVariableCache->newestCommitTsXid;
	/* neither is invalid, or both are */
	Assert(TransactionIdIsValid(oldestCommitTsXid) == TransactionIdIsValid(newestCommitTsXid));
	LWLockRelease(CommitTsLock);

	/*
	 * Return empty if the requested value is outside our valid range.
	 */
	if (!TransactionIdIsValid(oldestCommitTsXid) ||
		TransactionIdPrecedes(xid, oldestCommitTsXid) ||
		TransactionIdPrecedes(newestCommitTsXid, xid))
	{
		*ts = 0;
		if (nodeid)
			*nodeid = InvalidRepOriginId;
		return false;
	}

	/* lock is acquired by SimpleLruReadPage_ReadOnly */
	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
	memcpy(&entry,
		   CommitTsCtl->shared->page_buffer[slotno] +
		   SizeOfCommitTimestampEntry * entryno,
		   SizeOfCommitTimestampEntry);

	*ts = entry.time;
	if (nodeid)
		*nodeid = entry.nodeid;

	LWLockRelease(CommitTsControlLock);
	return *ts != 0;
}
Exemplo n.º 20
0
/*
 * write_database_file: update the flat database file
 *
 * A side effect is to determine the oldest database's datfrozenxid
 * so we can set or update the XID wrap limit.
 *
 * Also, if "startup" is true, we tell relcache.c to clear out the relcache
 * init file in each database.  That's a bit nonmodular, but scanning
 * pg_database twice during system startup seems too high a price for keeping
 * things better separated.
 */
static void
write_database_file(Relation drel, bool startup)
{
	StringInfoData buffer;
	HeapScanDesc scan;
	HeapTuple	tuple;
	NameData	oldest_datname;
	TransactionId oldest_datfrozenxid = InvalidTransactionId;
	MirroredFlatFileOpen mirroredOpen;

	initStringInfo(&buffer);

	MirroredFlatFile_Open(
					&mirroredOpen,
					"global",
					"pg_database",
					O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY,
					S_IRUSR | S_IWUSR,
					/* suppressError */ false,
					/* atomic operation */ true,
					/*isMirrorRecovery */ false);
	/*
	 * Read pg_database and write the file.
	 */
	scan = heap_beginscan(drel, SnapshotNow, 0, NULL);
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
		char	   *datname;
		Oid			datoid;
		Oid			dattablespace;
		TransactionId datfrozenxid;

		datname = NameStr(dbform->datname);
		datoid = HeapTupleGetOid(tuple);
		dattablespace = dbform->dattablespace;
		datfrozenxid = dbform->datfrozenxid;

		/*
		 * Identify the oldest datfrozenxid.  This must match
		 * the logic in vac_truncate_clog() in vacuum.c.
		 *
		 * MPP-20053: Skip databases that cannot be connected to in computing
		 * the oldest database.
		 */
		if (dbform->datallowconn && TransactionIdIsNormal(datfrozenxid))
		{
			if (oldest_datfrozenxid == InvalidTransactionId ||
				TransactionIdPrecedes(datfrozenxid, oldest_datfrozenxid))
			{
				oldest_datfrozenxid = datfrozenxid;
				namestrcpy(&oldest_datname, datname);
			}
		}

		/*
		 * Check for illegal characters in the database name.
		 */
		if (!name_okay(datname))
		{
			ereport(LOG,
					(errmsg("invalid database name \"%s\"", datname)));
			continue;
		}

		/*
		 * The file format is: "dbname" oid tablespace frozenxid
		 *
		 * The xids are not needed for backend startup, but are of use to
		 * autovacuum, and might also be helpful for forensic purposes.
		 */
		sputs_quote(&buffer, datname);
		appendStringInfo(&buffer, " %u %u %u\n",
						 datoid, dattablespace, datfrozenxid);

		/*
		 * MPP-10111 - During database expansion we need to be able to bring a
		 * database up in order to correct the filespace locations in the
		 * catalog.  At this point we will not be able to resolve database paths
		 * for databases not stored in "pg_default" or "pg_global".
		 *
		 * This is solved by passing a special guc to the startup during this
		 * phase of expand to bypass logic involving non-system tablespaces.
		 * Since we are bypassing the clearing of the relation cache on these
		 * databases we need to ensure that we don't try to use them at all
		 * elsewhere.  This is done with a similar check in
		 * PersistentTablespace_GetPrimaryAndMirrorFilespaces().
		 */
		if (gp_before_filespace_setup && !IsBuiltinTablespace(dattablespace))
			continue;
	}
	heap_endscan(scan);

	MirroredFlatFile_Append(&mirroredOpen, buffer.data, buffer.len,
							/* suppressError */ false);
	MirroredFlatFile_Flush(&mirroredOpen, /* suppressError */ false);
	MirroredFlatFile_Close(&mirroredOpen);

	if (buffer.maxlen > 0)
		pfree(buffer.data);

	/*
	 * Set the transaction ID wrap limit using the oldest datfrozenxid
	 */
	if (oldest_datfrozenxid != InvalidTransactionId)
		SetTransactionIdLimit(oldest_datfrozenxid, &oldest_datname);
}
Exemplo n.º 21
0
/*
 * Hot Standby feedback
 */
static void
ProcessStandbyHSFeedbackMessage(void)
{
	StandbyHSFeedbackMessage reply;
	TransactionId nextXid;
	uint32		nextEpoch;

	/* Decipher the reply message */
	pq_copymsgbytes(&reply_message, (char *) &reply,
					sizeof(StandbyHSFeedbackMessage));

	elog(DEBUG2, "hot standby feedback xmin %u epoch %u",
		 reply.xmin,
		 reply.epoch);

	/* Ignore invalid xmin (can't actually happen with current walreceiver) */
	if (!TransactionIdIsNormal(reply.xmin))
		return;

	/*
	 * Check that the provided xmin/epoch are sane, that is, not in the future
	 * and not so far back as to be already wrapped around.  Ignore if not.
	 *
	 * Epoch of nextXid should be same as standby, or if the counter has
	 * wrapped, then one greater than standby.
	 */
	GetNextXidAndEpoch(&nextXid, &nextEpoch);

	if (reply.xmin <= nextXid)
	{
		if (reply.epoch != nextEpoch)
			return;
	}
	else
	{
		if (reply.epoch + 1 != nextEpoch)
			return;
	}

	if (!TransactionIdPrecedesOrEquals(reply.xmin, nextXid))
		return;					/* epoch OK, but it's wrapped around */

	/*
	 * Set the WalSender's xmin equal to the standby's requested xmin, so that
	 * the xmin will be taken into account by GetOldestXmin.  This will hold
	 * back the removal of dead rows and thereby prevent the generation of
	 * cleanup conflicts on the standby server.
	 *
	 * There is a small window for a race condition here: although we just
	 * checked that reply.xmin precedes nextXid, the nextXid could have gotten
	 * advanced between our fetching it and applying the xmin below, perhaps
	 * far enough to make reply.xmin wrap around.  In that case the xmin we
	 * set here would be "in the future" and have no effect.  No point in
	 * worrying about this since it's too late to save the desired data
	 * anyway.  Assuming that the standby sends us an increasing sequence of
	 * xmins, this could only happen during the first reply cycle, else our
	 * own xmin would prevent nextXid from advancing so far.
	 *
	 * We don't bother taking the ProcArrayLock here.  Setting the xmin field
	 * is assumed atomic, and there's no real need to prevent a concurrent
	 * GetOldestXmin.  (If we're moving our xmin forward, this is obviously
	 * safe, and if we're moving it backwards, well, the data is at risk
	 * already since a VACUUM could have just finished calling GetOldestXmin.)
	 */
	MyPgXact->xmin = reply.xmin;
}
Exemplo n.º 22
0
/*
 * This is a copy of swap_relation_files in cluster.c, but it also swaps
 * relfrozenxid.
 */
static void
swap_heap_or_index_files(Oid r1, Oid r2)
{
    Relation	relRelation;
    HeapTuple	reltup1,
                reltup2;
    Form_pg_class relform1,
                  relform2;
    Oid			swaptemp;
    CatalogIndexState indstate;

    /* We need writable copies of both pg_class tuples. */
    relRelation = heap_open(RelationRelationId, RowExclusiveLock);

    reltup1 = SearchSysCacheCopy(RELOID,
                                 ObjectIdGetDatum(r1),
                                 0, 0, 0);
    if (!HeapTupleIsValid(reltup1))
        elog(ERROR, "cache lookup failed for relation %u", r1);
    relform1 = (Form_pg_class) GETSTRUCT(reltup1);

    reltup2 = SearchSysCacheCopy(RELOID,
                                 ObjectIdGetDatum(r2),
                                 0, 0, 0);
    if (!HeapTupleIsValid(reltup2))
        elog(ERROR, "cache lookup failed for relation %u", r2);
    relform2 = (Form_pg_class) GETSTRUCT(reltup2);

    Assert(relform1->relkind == relform2->relkind);

    /*
     * Actually swap the fields in the two tuples
     */
    swaptemp = relform1->relfilenode;
    relform1->relfilenode = relform2->relfilenode;
    relform2->relfilenode = swaptemp;

    swaptemp = relform1->reltablespace;
    relform1->reltablespace = relform2->reltablespace;
    relform2->reltablespace = swaptemp;

    swaptemp = relform1->reltoastrelid;
    relform1->reltoastrelid = relform2->reltoastrelid;
    relform2->reltoastrelid = swaptemp;

    /* set rel1's frozen Xid to larger one */
    if (TransactionIdIsNormal(relform1->relfrozenxid))
    {
        if (TransactionIdFollows(relform1->relfrozenxid,
                                 relform2->relfrozenxid))
            relform1->relfrozenxid = relform2->relfrozenxid;
        else
            relform2->relfrozenxid = relform1->relfrozenxid;
    }

    /* swap size statistics too, since new rel has freshly-updated stats */
    {
#if PG_VERSION_NUM >= 90300
        int32		swap_pages;
#else
        int4		swap_pages;
#endif
        float4		swap_tuples;

        swap_pages = relform1->relpages;
        relform1->relpages = relform2->relpages;
        relform2->relpages = swap_pages;

        swap_tuples = relform1->reltuples;
        relform1->reltuples = relform2->reltuples;
        relform2->reltuples = swap_tuples;
    }

    /* Update the tuples in pg_class */
    simple_heap_update(relRelation, &reltup1->t_self, reltup1);
    simple_heap_update(relRelation, &reltup2->t_self, reltup2);

    /* Keep system catalogs current */
    indstate = CatalogOpenIndexes(relRelation);
    CatalogIndexInsert(indstate, reltup1);
    CatalogIndexInsert(indstate, reltup2);
    CatalogCloseIndexes(indstate);

    /*
     * If we have toast tables associated with the relations being swapped,
     * change their dependency links to re-associate them with their new
     * owning relations.  Otherwise the wrong one will get dropped ...
     *
     * NOTE: it is possible that only one table has a toast table; this can
     * happen in CLUSTER if there were dropped columns in the old table, and
     * in ALTER TABLE when adding or changing type of columns.
     *
     * NOTE: at present, a TOAST table's only dependency is the one on its
     * owning table.  If more are ever created, we'd need to use something
     * more selective than deleteDependencyRecordsFor() to get rid of only the
     * link we want.
     */
    if (relform1->reltoastrelid || relform2->reltoastrelid)
    {
        ObjectAddress baseobject,
                      toastobject;
        long		count;

        /* Delete old dependencies */
        if (relform1->reltoastrelid)
        {
            count = deleteDependencyRecordsFor(RelationRelationId,
                                               relform1->reltoastrelid,
                                               false);
            if (count != 1)
                elog(ERROR, "expected one dependency record for TOAST table, found %ld",
                     count);
        }
        if (relform2->reltoastrelid)
        {
            count = deleteDependencyRecordsFor(RelationRelationId,
                                               relform2->reltoastrelid,
                                               false);
            if (count != 1)
                elog(ERROR, "expected one dependency record for TOAST table, found %ld",
                     count);
        }

        /* Register new dependencies */
        baseobject.classId = RelationRelationId;
        baseobject.objectSubId = 0;
        toastobject.classId = RelationRelationId;
        toastobject.objectSubId = 0;

        if (relform1->reltoastrelid)
        {
            baseobject.objectId = r1;
            toastobject.objectId = relform1->reltoastrelid;
            recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
        }

        if (relform2->reltoastrelid)
        {
            baseobject.objectId = r2;
            toastobject.objectId = relform2->reltoastrelid;
            recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
        }
    }

    /*
     * Blow away the old relcache entries now.	We need this kluge because
     * relcache.c keeps a link to the smgr relation for the physical file, and
     * that will be out of date as soon as we do CommandCounterIncrement.
     * Whichever of the rels is the second to be cleared during cache
     * invalidation will have a dangling reference to an already-deleted smgr
     * relation.  Rather than trying to avoid this by ordering operations just
     * so, it's easiest to not have the relcache entries there at all.
     * (Fortunately, since one of the entries is local in our transaction,
     * it's sufficient to clear out our own relcache this way; the problem
     * cannot arise for other backends when they see our update on the
     * non-local relation.)
     */
    RelationForgetRelation(r1);
    RelationForgetRelation(r2);

    /* Clean up. */
    heap_freetuple(reltup1);
    heap_freetuple(reltup2);

    heap_close(relRelation, RowExclusiveLock);
}
Exemplo n.º 23
0
/*
 * Interrogate the commit timestamp of a transaction.
 *
 * The return value indicates whether a commit timestamp record was found for
 * the given xid.  The timestamp value is returned in *ts (which may not be
 * null), and the origin node for the Xid is returned in *nodeid, if it's not
 * null.
 */
bool
TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
							 RepOriginId *nodeid)
{
	int			pageno = TransactionIdToCTsPage(xid);
	int			entryno = TransactionIdToCTsEntry(xid);
	int			slotno;
	CommitTimestampEntry entry;
	TransactionId oldestCommitTs;
	TransactionId newestCommitTs;

	/* error if the given Xid doesn't normally commit */
	if (!TransactionIdIsNormal(xid))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
		errmsg("cannot retrieve commit timestamp for transaction %u", xid)));

	LWLockAcquire(CommitTsLock, LW_SHARED);

	/* Error if module not enabled */
	if (!commitTsShared->commitTsActive)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("could not get commit timestamp data"),
			  errhint("Make sure the configuration parameter \"%s\" is set.",
					  "track_commit_timestamp")));

	/*
	 * If we're asked for the cached value, return that.  Otherwise, fall
	 * through to read from SLRU.
	 */
	if (commitTsShared->xidLastCommit == xid)
	{
		*ts = commitTsShared->dataLastCommit.time;
		if (nodeid)
			*nodeid = commitTsShared->dataLastCommit.nodeid;

		LWLockRelease(CommitTsLock);
		return *ts != 0;
	}

	oldestCommitTs = ShmemVariableCache->oldestCommitTs;
	newestCommitTs = ShmemVariableCache->newestCommitTs;
	/* neither is invalid, or both are */
	Assert(TransactionIdIsValid(oldestCommitTs) == TransactionIdIsValid(newestCommitTs));
	LWLockRelease(CommitTsLock);

	/*
	 * Return empty if the requested value is outside our valid range.
	 */
	if (!TransactionIdIsValid(oldestCommitTs) ||
		TransactionIdPrecedes(xid, oldestCommitTs) ||
		TransactionIdPrecedes(newestCommitTs, xid))
	{
		*ts = 0;
		if (nodeid)
			*nodeid = InvalidRepOriginId;
		return false;
	}

	/* lock is acquired by SimpleLruReadPage_ReadOnly */
	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
	memcpy(&entry,
		   CommitTsCtl->shared->page_buffer[slotno] +
		   SizeOfCommitTimestampEntry * entryno,
		   SizeOfCommitTimestampEntry);

	*ts = entry.time;
	if (nodeid)
		*nodeid = entry.nodeid;

	LWLockRelease(CommitTsControlLock);
	return *ts != 0;
}
Exemplo n.º 24
0
/*
 * ImportSnapshot
 *      Import a previously exported snapshot.  The argument should be a
 *      filename in SNAPSHOT_EXPORT_DIR.  Load the snapshot from that file.
 *      This is called by "SET TRANSACTION SNAPSHOT 'foo'".
 */
void
ImportSnapshot(const char *idstr)
{
	char		path[MAXPGPATH];
	FILE	   *f;
	struct stat	stat_buf;
	char	   *filebuf;
	int			xcnt;
	int			i;
	TransactionId src_xid;
	Oid			src_dbid;
	int			src_isolevel;
	bool		src_readonly;
	SnapshotData snapshot;

	/*
	 * Must be at top level of a fresh transaction.  Note in particular that
	 * we check we haven't acquired an XID --- if we have, it's conceivable
	 * that the snapshot would show it as not running, making for very
	 * screwy behavior.
	 */
	if (FirstSnapshotSet ||
		GetTopTransactionIdIfAny() != InvalidTransactionId ||
		IsSubTransaction())
		ereport(ERROR,
				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
				 errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));

	/*
	 * If we are in read committed mode then the next query would execute
	 * with a new snapshot thus making this function call quite useless.
	 */
	if (!IsolationUsesXactSnapshot())
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));

	/*
	 * Verify the identifier: only 0-9, A-F and hyphens are allowed.  We do
	 * this mainly to prevent reading arbitrary files.
	 */
	if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid snapshot identifier \"%s\"", idstr)));

	/* OK, read the file */
	snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);

	f = AllocateFile(path, PG_BINARY_R);
	if (!f)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid snapshot identifier \"%s\"", idstr)));

	/* get the size of the file so that we know how much memory we need */
	if (fstat(fileno(f), &stat_buf))
		elog(ERROR, "could not stat file \"%s\": %m", path);

	/* and read the file into a palloc'd string */
	filebuf = (char *) palloc(stat_buf.st_size + 1);
	if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
		elog(ERROR, "could not read file \"%s\": %m", path);

	filebuf[stat_buf.st_size] = '\0';

	FreeFile(f);

	/*
	 * Construct a snapshot struct by parsing the file content.
	 */
	memset(&snapshot, 0, sizeof(snapshot));

	src_xid = parseXidFromText("xid:", &filebuf, path);
	/* we abuse parseXidFromText a bit here ... */
	src_dbid = parseXidFromText("dbid:", &filebuf, path);
	src_isolevel = parseIntFromText("iso:", &filebuf, path);
	src_readonly = parseIntFromText("ro:", &filebuf, path);

	snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
	snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);

	snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);

	/* sanity-check the xid count before palloc */
	if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
				 errmsg("invalid snapshot data in file \"%s\"", path)));

	snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
	for (i = 0; i < xcnt; i++)
		snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);

	snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);

	if (!snapshot.suboverflowed)
	{
		snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);

		/* sanity-check the xid count before palloc */
		if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
					 errmsg("invalid snapshot data in file \"%s\"", path)));

		snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
		for (i = 0; i < xcnt; i++)
			snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
	}
	else
	{
		snapshot.subxcnt = 0;
		snapshot.subxip = NULL;
	}

	snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);

	/*
	 * Do some additional sanity checking, just to protect ourselves.  We
	 * don't trouble to check the array elements, just the most critical
	 * fields.
	 */
	if (!TransactionIdIsNormal(src_xid) ||
		!OidIsValid(src_dbid) ||
		!TransactionIdIsNormal(snapshot.xmin) ||
		!TransactionIdIsNormal(snapshot.xmax))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
				 errmsg("invalid snapshot data in file \"%s\"", path)));

	/*
	 * If we're serializable, the source transaction must be too, otherwise
	 * predicate.c has problems (SxactGlobalXmin could go backwards).  Also,
	 * a non-read-only transaction can't adopt a snapshot from a read-only
	 * transaction, as predicate.c handles the cases very differently.
	 */
	if (IsolationIsSerializable())
	{
		if (src_isolevel != XACT_SERIALIZABLE)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
		if (src_readonly && !XactReadOnly)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
	}

	/*
	 * We cannot import a snapshot that was taken in a different database,
	 * because vacuum calculates OldestXmin on a per-database basis; so the
	 * source transaction's xmin doesn't protect us from data loss.  This
	 * restriction could be removed if the source transaction were to mark
	 * its xmin as being globally applicable.  But that would require some
	 * additional syntax, since that has to be known when the snapshot is
	 * initially taken.  (See pgsql-hackers discussion of 2011-10-21.)
	 */
	if (src_dbid != MyDatabaseId)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot import a snapshot from a different database")));

	/* OK, install the snapshot */
	SetTransactionSnapshot(&snapshot, src_xid);
}