예제 #1
0
/*
 * Individual logging of AccessExclusiveLocks for use during LockAcquire()
 */
void
LogAccessExclusiveLock(Oid dbOid, Oid relOid)
{
	xl_standby_lock xlrec;

	/*
	 * Ensure that a TransactionId has been assigned to this transaction. We
	 * don't actually need the xid yet but if we don't do this then
	 * RecordTransactionCommit() and RecordTransactionAbort() will optimise
	 * away the transaction completion record which recovery relies upon to
	 * release locks. It's a hack, but for a corner case not worth adding code
	 * for into the main commit path.
	 */
	xlrec.xid = GetTopTransactionId();

	/*
	 * Decode the locktag back to the original values, to avoid sending lots
	 * of empty bytes with every message.  See lock.h to check how a locktag
	 * is defined for LOCKTAG_RELATION
	 */
	xlrec.dbOid = dbOid;
	xlrec.relOid = relOid;

	LogAccessExclusiveLocks(1, &xlrec);
}
예제 #2
0
파일: txid.c 프로젝트: PengJi/gpdb-comments
/*
 * txid_current() returns int8
 *
 *		Return the current toplevel transaction ID as TXID
 */
Datum
txid_current(PG_FUNCTION_ARGS)
{
	txid		val;
	TxidEpoch	state;

	load_xid_epoch(&state);

	val = convert_xid(GetTopTransactionId(), &state);

	PG_RETURN_INT64(val);
}
예제 #3
0
/*
 * AtEOXact_Snapshot
 *		Snapshot manager's cleanup function for end of transaction
 */
void
AtEOXact_Snapshot(bool isCommit)
{
	/*
	 * In transaction-snapshot mode we must release our privately-managed
	 * reference to the transaction snapshot.  We must decrement
	 * RegisteredSnapshots to keep the check below happy.  But we don't bother
	 * to do FreeSnapshot, for two reasons: the memory will go away with
	 * TopTransactionContext anyway, and if someone has left the snapshot
	 * stacked as active, we don't want the code below to be chasing through a
	 * dangling pointer.
	 */
	if (FirstXactSnapshot != NULL)
	{
		Assert(FirstXactSnapshot->regd_count > 0);
		Assert(!pairingheap_is_empty(&RegisteredSnapshots));
		pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
	}
	FirstXactSnapshot = NULL;

	/*
	 * If we exported any snapshots, clean them up.
	 */
	if (exportedSnapshots != NIL)
	{
		TransactionId myxid = GetTopTransactionId();
		int			i;
		char		buf[MAXPGPATH];
		ListCell   *lc;

		/*
		 * Get rid of the files.  Unlink failure is only a WARNING because (1)
		 * it's too late to abort the transaction, and (2) leaving a leaked
		 * file around has little real consequence anyway.
		 */
		for (i = 1; i <= list_length(exportedSnapshots); i++)
		{
			XactExportFilePath(buf, myxid, i, "");
			if (unlink(buf))
				elog(WARNING, "could not unlink file \"%s\": %m", buf);
		}

		/*
		 * As with the FirstXactSnapshot, we needn't spend any effort on
		 * cleaning up the per-snapshot data structures, but we do need to
		 * unlink them from RegisteredSnapshots to prevent a warning below.
		 */
		foreach(lc, exportedSnapshots)
		{
			Snapshot	snap = (Snapshot) lfirst(lc);

			pairingheap_remove(&RegisteredSnapshots, &snap->ph_node);
		}
/*
 *		xid_age			- compute age of an XID (relative to current xact)
 */
Datum
xid_age(PG_FUNCTION_ARGS)
{
    TransactionId xid = PG_GETARG_TRANSACTIONID(0);
    TransactionId now = GetTopTransactionId();

    /* Permanent XIDs are always infinitely old */
    if (!TransactionIdIsNormal(xid))
        PG_RETURN_INT32(INT_MAX);

    PG_RETURN_INT32((int32) (now - xid));
}
예제 #5
0
/*
 * LockGXact
 *		Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
 */
static GlobalTransaction
LockGXact(const char *gid, Oid user)
{
	int			i;

	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);

	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
	{
		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];

		/* Ignore not-yet-valid GIDs */
		if (!gxact->valid)
			continue;
		if (strcmp(gxact->gid, gid) != 0)
			continue;

		/* Found it, but has someone else got it locked? */
		if (TransactionIdIsValid(gxact->locking_xid))
		{
			if (TransactionIdIsActive(gxact->locking_xid))
				ereport(ERROR,
						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				errmsg("prepared transaction with identifier \"%s\" is busy",
					   gid)));
			gxact->locking_xid = InvalidTransactionId;
		}

		if (user != gxact->owner && !superuser_arg(user))
			ereport(ERROR,
					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				  errmsg("permission denied to finish prepared transaction"),
					 errhint("Must be superuser or the user that prepared the transaction.")));

		/* OK for me to lock it */
		gxact->locking_xid = GetTopTransactionId();

		LWLockRelease(TwoPhaseStateLock);

		return gxact;
	}

	LWLockRelease(TwoPhaseStateLock);

	ereport(ERROR,
			(errcode(ERRCODE_UNDEFINED_OBJECT),
		 errmsg("prepared transaction with identifier \"%s\" does not exist",
				gid)));

	/* NOTREACHED */
	return NULL;
}
예제 #6
0
파일: standby.c 프로젝트: hl0103/pgxc
/*
 * Individual logging of AccessExclusiveLocks for use during LockAcquire()
 */
void
LogAccessExclusiveLock(Oid dbOid, Oid relOid)
{
	xl_standby_lock xlrec;

	xlrec.xid = GetTopTransactionId();

	/*
	 * Decode the locktag back to the original values, to avoid sending lots
	 * of empty bytes with every message.  See lock.h to check how a locktag
	 * is defined for LOCKTAG_RELATION
	 */
	xlrec.dbOid = dbOid;
	xlrec.relOid = relOid;

	LogAccessExclusiveLocks(1, &xlrec);
}
예제 #7
0
파일: standby.c 프로젝트: hl0103/pgxc
/*
 * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
 */
void
LogAccessExclusiveLockPrepare(void)
{
	/*
	 * Ensure that a TransactionId has been assigned to this transaction, for
	 * two reasons, both related to lock release on the standby. First, we
	 * must assign an xid so that RecordTransactionCommit() and
	 * RecordTransactionAbort() do not optimise away the transaction
	 * completion record which recovery relies upon to release locks. It's a
	 * hack, but for a corner case not worth adding code for into the main
	 * commit path. Second, must must assign an xid before the lock is
	 * recorded in shared memory, otherwise a concurrently executing
	 * GetRunningTransactionLocks() might see a lock associated with an
	 * InvalidTransactionId which we later assert cannot happen.
	 */
	(void) GetTopTransactionId();
}
예제 #8
0
/*
 * txid_current() returns int8
 *
 *	Return the current toplevel transaction ID as TXID
 *	If the current transaction does not have one, one is assigned.
 */
Datum
txid_current(PG_FUNCTION_ARGS)
{
	txid		val;
	TxidEpoch	state;

	/*
	 * Must prevent during recovery because if an xid is not assigned we try
	 * to assign one, which would fail. Programs already rely on this function
	 * to always return a valid current xid, so we should not change this to
	 * return NULL or similar invalid xid.
	 */
	PreventCommandDuringRecovery("txid_current()");

	load_xid_epoch(&state);

	val = convert_xid(GetTopTransactionId(), &state);

	PG_RETURN_INT64(val);
}
예제 #9
0
파일: lmgr.c 프로젝트: AnLingm/gpdb
/*
 *		XactLockTableWait
 *
 * Wait for the specified transaction to commit or abort.
 *
 * Note that this does the right thing for subtransactions: if we wait on a
 * subtransaction, we will exit as soon as it aborts or its top parent commits.
 * It takes some extra work to ensure this, because to save on shared memory
 * the XID lock of a subtransaction is released when it ends, whether
 * successfully or unsuccessfully.	So we have to check if it's "still running"
 * and if so wait for its parent.
 */
void
XactLockTableWait(TransactionId xid)
{
	LOCKTAG		tag;

	for (;;)
	{
		Assert(TransactionIdIsValid(xid));
		Assert(!TransactionIdEquals(xid, GetTopTransactionId()));

		SET_LOCKTAG_TRANSACTION(tag, xid);

		(void) LockAcquire(&tag, ShareLock, false, false);

		LockRelease(&tag, ShareLock, false);

		if (!TransactionIdIsInProgress(xid))
			break;
		xid = SubTransGetParent(xid);
	}
}
예제 #10
0
파일: extractGridData.c 프로젝트: metno/wdb
struct GridPointDataListIterator * getExtractGridDataReturnValues(FunctionCallInfo fcinfo)
{
    struct PlaceSpecification ps;
    Datum placeSpec = PG_GETARG_DATUM(0);
    extractPlaceSpecification( & ps, & placeSpec );

    GEOSGeom location = NULL;
    if ( ! PG_ARGISNULL(1) )
    {
		bytea * locationRaw = PG_GETARG_BYTEA_P(1);
		location = GEOSGeomFromWKB_buf((unsigned char *) VARDATA(locationRaw), VARSIZE(locationRaw) - VARHDRSZ);
    }

    enum InterpolationType interpolation = (enum InterpolationType) PG_GETARG_INT32(2);
    FileId dataId = PG_GETARG_INT64(3);

    TransactionId xid = GetTopTransactionId();
    CommandId cid = GetCurrentCommandId(true); // Incremented for each function call in the same transaction

    // function takes ownership of location parameter
    struct GridPointDataListIterator * ret = readPoints(& ps, location, interpolation, dataId, xid, cid);
    return ret;
}
예제 #11
0
/*
 * fetch insert plan from cache.
 */
static void *load_insert_plan(Datum qname, struct QueueState *state)
{
	struct InsertCacheEntry *entry;
	Oid queue_id = state->queue_id;
	bool did_exist = false;

	entry = hash_search(insert_cache, &queue_id, HASH_ENTER, &did_exist);
	if (did_exist) {
		if (entry->plan && state->cur_table == entry->cur_table)
			goto valid_table;
		if (entry->plan)
			SPI_freeplan(entry->plan);
	}

	entry->cur_table = state->cur_table;
	entry->last_xid = 0;
	entry->plan = NULL;

	/* this can fail, struct must be valid before */
	entry->plan = make_plan(state);
valid_table:

	if (state->per_tx_limit >= 0) {
		TransactionId xid = GetTopTransactionId();
		if (entry->last_xid != xid) {
			entry->last_xid = xid;
			entry->last_count = 0;
		}
		entry->last_count++;
		if (entry->last_count > state->per_tx_limit)
			elog(ERROR, "Queue '%s' allows max %d events from one TX",
			     TextDatumGetCString(qname), state->per_tx_limit);
	}

	return entry->plan;
}
예제 #12
0
파일: lmgr.c 프로젝트: AnLingm/gpdb
/*
 *		ConditionalXactLockTableWait
 *
 * As above, but only lock if we can get the lock without blocking.
 * Returns TRUE if the lock was acquired.
 */
bool
ConditionalXactLockTableWait(TransactionId xid)
{
	LOCKTAG		tag;

	for (;;)
	{
		Assert(TransactionIdIsValid(xid));
		Assert(!TransactionIdEquals(xid, GetTopTransactionId()));

		SET_LOCKTAG_TRANSACTION(tag, xid);

		if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL)
			return false;

		LockRelease(&tag, ShareLock, false);

		if (!TransactionIdIsInProgress(xid))
			break;
		xid = SubTransGetParent(xid);
	}

	return true;
}
예제 #13
0
/*
 * AtEOXact_Snapshot
 *		Snapshot manager's cleanup function for end of transaction
 */
void
AtEOXact_Snapshot(bool isCommit)
{
	/*
	 * In transaction-snapshot mode we must release our privately-managed
	 * reference to the transaction snapshot.  We must decrement
	 * RegisteredSnapshots to keep the check below happy.  But we don't bother
	 * to do FreeSnapshot, for two reasons: the memory will go away with
	 * TopTransactionContext anyway, and if someone has left the snapshot
	 * stacked as active, we don't want the code below to be chasing through
	 * a dangling pointer.
	 */
	if (FirstXactSnapshot != NULL)
	{
		Assert(FirstXactSnapshot->regd_count > 0);
		Assert(RegisteredSnapshots > 0);
		RegisteredSnapshots--;
	}
	FirstXactSnapshot = NULL;

	/*
	 * If we exported any snapshots, clean them up.
	 */
	if (exportedSnapshots != NIL)
	{
		TransactionId myxid = GetTopTransactionId();
		int			i;
		char		buf[MAXPGPATH];

		/*
		 * Get rid of the files.  Unlink failure is only a WARNING because
		 * (1) it's too late to abort the transaction, and (2) leaving a
		 * leaked file around has little real consequence anyway.
		 */
		for (i = 1; i <= list_length(exportedSnapshots); i++)
		{
			XactExportFilePath(buf, myxid, i, "");
			if (unlink(buf))
				elog(WARNING, "could not unlink file \"%s\": %m", buf);
		}

		/*
		 * As with the FirstXactSnapshot, we needn't spend any effort on
		 * cleaning up the per-snapshot data structures, but we do need to
		 * adjust the RegisteredSnapshots count to prevent a warning below.
		 *
		 * Note: you might be thinking "why do we have the exportedSnapshots
		 * list at all?  All we need is a counter!".  You're right, but we do
		 * it this way in case we ever feel like improving xmin management.
		 */
		Assert(RegisteredSnapshots >= list_length(exportedSnapshots));
		RegisteredSnapshots -= list_length(exportedSnapshots);

		exportedSnapshots = NIL;
	}

	/* On commit, complain about leftover snapshots */
	if (isCommit)
	{
		ActiveSnapshotElt *active;

		if (RegisteredSnapshots != 0)
			elog(WARNING, "%d registered snapshots seem to remain after cleanup",
				 RegisteredSnapshots);

		/* complain about unpopped active snapshots */
		for (active = ActiveSnapshot; active != NULL; active = active->as_next)
			elog(WARNING, "snapshot %p still active", active);
	}

	/*
	 * And reset our state.  We don't need to free the memory explicitly --
	 * it'll go away with TopTransactionContext.
	 */
	ActiveSnapshot = NULL;
	RegisteredSnapshots = 0;

	CurrentSnapshot = NULL;
	SecondarySnapshot = NULL;

	FirstSnapshotSet = false;

	SnapshotResetXmin();
}
/*
 * Indicate we intend to create a filespace file as part of the current transaction.
 *
 * An XLOG IntentToCreate record is generated that will guard the subsequent file-system
 * create in case the transaction aborts.
 *
 * After 1 or more calls to this routine to mark intention about filespace files that are going
 * to be created, call ~_DoPendingCreates to do the actual file-system creates.  (See its
 * note on XLOG flushing).
 */
void PersistentFilespace_MarkCreatePending(
	Oid 		filespaceOid,
	char 		*filespaceLocation,
	ItemPointer		persistentTid,
	int64			*persistentSerialNum,
	bool			flushToXLog)
{
	WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

	PersistentFileSysObjName fsObjName;

	FilespaceDirEntry filespaceDirEntry;

	if (Persistent_BeforePersistenceWork())
	{
		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(),
				 "Skipping persistent filespace %u because we are before persistence work",
				 filespaceOid);

		return;	// The initdb process will load the persistent table once we out of bootstrap mode.
	}

	PersistentFilespace_VerifyInitScan();

	PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid,is_filespace_shared);

	WRITE_PERSISTENT_STATE_ORDERED_LOCK;

	filespaceDirEntry =
				PersistentFilespace_CreateDirUnderLock(filespaceOid);
	if (filespaceDirEntry == NULL)
	{
		/* If out of shared memory, no need to promote to PANIC. */
		WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("Out of shared-memory for persistent filespaces"),
				 errhint("You may need to increase the gp_max_filespaces value"),
				 errOmitLocation(true)));
	}

	PersistentFilespace_BlankPadCopyLocation(
										filespaceDirEntry->locationBlankPadded1,
										filespaceLocation);

	filespaceDirEntry->state = PersistentFileSysState_CreatePending;

	PersistentFilespace_AddTuple(
							filespaceDirEntry,
							/* createMirrorDataLossTrackingSessionNum */ 0,
							/* reserved */ 0,
							/* parentXid */ GetTopTransactionId(),
							flushToXLog);

	*persistentTid = filespaceDirEntry->persistentTid;
	*persistentSerialNum = filespaceDirEntry->persistentSerialNum;

	/*
	 * This XLOG must be generated under the persistent write-lock.
	 */
#ifdef MASTER_MIRROR_SYNC
	mmxlog_log_create_filespace(filespaceOid);
#endif


	#ifdef FAULT_INJECTOR
			FaultInjector_InjectFaultIfSet(
										   FaultBeforePendingDeleteFilespaceEntry,
										   DDLNotSpecified,
										   "",  // databaseName
										   ""); // tableName
	#endif

	/*
	 * MPP-18228
	 * To make adding 'Create Pending' entry to persistent table and adding
	 * to the PendingDelete list atomic
	 */
	PendingDelete_AddCreatePendingEntryWrapper(
								&fsObjName,
								persistentTid,
								*persistentSerialNum);

	WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;

	if (Debug_persistent_print)
		elog(Persistent_DebugPrintLevel(),
		     "Persistent filespace directory: Add '%s' in state 'Created', serial number " INT64_FORMAT " at TID %s",
			 PersistentFileSysObjName_ObjectName(&fsObjName),
			 *persistentSerialNum,
			 ItemPointerToString(persistentTid));
}
예제 #15
0
/*
 * StrategyBufferLookup
 *
 *	Lookup a page request in the cache directory. A buffer is only
 *	returned for a T1 or T2 cache hit. B1 hits are just
 *	remembered here, to possibly affect the behaviour later.
 *
 *	recheck indicates we are rechecking after I/O wait; do not change
 *	internal status in this case.
 *
 *	*cdb_found_index is set to the index of the found CDB, or -1 if none.
 *	This is not intended to be used by the caller, except to pass to
 *	StrategyReplaceBuffer().
 */
BufferDesc *
StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
					 int *cdb_found_index)
{
	BufferStrategyCDB *cdb;

	/* Optional stats printout */
	if (DebugSharedBuffers > 0)
		StrategyStatsDump();

	/*
	 * Count lookups
	 */
	StrategyControl->num_lookup++;

	/*
	 * Lookup the block in the shared hash table
	 */
	*cdb_found_index = BufTableLookup(tagPtr);

	/*
	 * Done if complete CDB lookup miss
	 */
	if (*cdb_found_index < 0)
		return NULL;

	/*
	 * We found a CDB
	 */
	cdb = &StrategyCDB[*cdb_found_index];

	/*
	 * Count hits
	 */
	StrategyControl->num_hit[cdb->list]++;

	/*
	 * If this is a T2 hit, we simply move the CDB to the T2 MRU position
	 * and return the found buffer.
	 *
	 * A CDB in T2 cannot have t1_vacuum set, so we needn't check.  However,
	 * if the current process is VACUUM then it doesn't promote to MRU.
	 */
	if (cdb->list == STRAT_LIST_T2)
	{
		if (!strategy_hint_vacuum)
		{
			STRAT_LIST_REMOVE(cdb);
			STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
		}

		return &BufferDescriptors[cdb->buf_id];
	}

	/*
	 * If this is a T1 hit, we move the buffer to the T2 MRU only if
	 * another transaction had read it into T1, *and* neither transaction
	 * is a VACUUM. This is required because any UPDATE or DELETE in
	 * PostgreSQL does multiple ReadBuffer(), first during the scan, later
	 * during the heap_update() or heap_delete().  Otherwise move to T1
	 * MRU.  VACUUM doesn't even get to make that happen.
	 */
	if (cdb->list == STRAT_LIST_T1)
	{
		if (!strategy_hint_vacuum)
		{
			if (!cdb->t1_vacuum &&
				!TransactionIdEquals(cdb->t1_xid, GetTopTransactionId()))
			{
				STRAT_LIST_REMOVE(cdb);
				STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
			}
			else
			{
				STRAT_LIST_REMOVE(cdb);
				STRAT_MRU_INSERT(cdb, STRAT_LIST_T1);

				/*
				 * If a non-VACUUM process references a page recently
				 * loaded by VACUUM, clear the stigma; the state will now
				 * be the same as if this process loaded it originally.
				 */
				if (cdb->t1_vacuum)
				{
					cdb->t1_xid = GetTopTransactionId();
					cdb->t1_vacuum = false;
				}
			}
		}

		return &BufferDescriptors[cdb->buf_id];
	}

	/*
	 * Even though we had seen the block in the past, its data is not
	 * currently in memory ... cache miss to the bufmgr.
	 */
	Assert(cdb->list == STRAT_LIST_B1);
	return NULL;
}
예제 #16
0
/*
 * StrategyReplaceBuffer
 *
 *	Called by the buffer manager to inform us that he flushed a buffer
 *	and is now about to replace the content. Prior to this call,
 *	the cache algorithm still reports the buffer as in the cache. After
 *	this call we report the new block, even if IO might still need to
 *	be done to bring in the new content.
 *
 *	cdb_found_index and cdb_replace_index must be the auxiliary values
 *	returned by previous calls to StrategyBufferLookup and StrategyGetBuffer.
 */
void
StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
					  int cdb_found_index, int cdb_replace_index)
{
	BufferStrategyCDB *cdb_found;
	BufferStrategyCDB *cdb_replace;

	if (cdb_found_index >= 0)
	{
		/* This must have been a ghost buffer cache hit (B1 list) */
		cdb_found = &StrategyCDB[cdb_found_index];

		/* Assert that the buffer remembered in cdb_found is the one */
		/* the buffer manager is currently faulting in */
		Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag));

		if (cdb_replace_index >= 0)
		{
			/* We are satisfying it with an evicted T buffer */
			cdb_replace = &StrategyCDB[cdb_replace_index];

			/* Assert that the buffer remembered in cdb_replace is */
			/* the one the buffer manager has just evicted */
			Assert(cdb_replace->list == STRAT_LIST_T1 ||
				   cdb_replace->list == STRAT_LIST_T2);
			Assert(cdb_replace->buf_id == buf->buf_id);
			Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));

			/*
			 * Under normal circumstances we move evicted T1 list entries
			 * to the B1 list.  However, T1 entries that exist only because
			 * of VACUUM are just thrown into the unused list instead,
			 * since it's unlikely they'll be touched again soon.  Similarly,
			 * evicted T2 entries are thrown away; the LRU T2 entry cannot
			 * have been touched recently.
			 */
			if (cdb_replace->t1_vacuum || cdb_replace->list == STRAT_LIST_T2)
			{
				BufTableDelete(&(cdb_replace->buf_tag));
				STRAT_LIST_REMOVE(cdb_replace);
				cdb_replace->next = StrategyControl->listUnusedCDB;
				StrategyControl->listUnusedCDB = cdb_replace_index;
			}
			else
			{
				STRAT_LIST_REMOVE(cdb_replace);
				STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
			}
			/* And clear its block reference */
			cdb_replace->buf_id = -1;
		}
		else
		{
			/* We are satisfying it with an unused buffer */
		}

		/* Now the found B1 CDB gets the buffer and is moved to T2 */
		cdb_found->buf_id = buf->buf_id;
		STRAT_LIST_REMOVE(cdb_found);
		STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
	}
	else
	{
		/*
		 * This was a complete cache miss, so we need to create a new CDB.
		 * We use a free one if available, else reclaim the tail end of B1.
		 */
		if (StrategyControl->listUnusedCDB >= 0)
		{
			cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB];
			StrategyControl->listUnusedCDB = cdb_found->next;
		}
		else
		{
			/* Can't fail because we have more CDBs than buffers... */
			if (B1_LENGTH == 0)
				elog(PANIC, "StrategyReplaceBuffer: out of CDBs");
			cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];

			BufTableDelete(&(cdb_found->buf_tag));
			STRAT_LIST_REMOVE(cdb_found);
		}

		/* Set the CDB's buf_tag and insert it into the hash table */
		cdb_found->buf_tag = *newTag;
		BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));

		if (cdb_replace_index >= 0)
		{
			/*
			 * The buffer was formerly in a T list, move its CDB to the
			 * appropriate list: B1 if T1, else discard it, as above
			 */
			cdb_replace = &StrategyCDB[cdb_replace_index];

			Assert(cdb_replace->list == STRAT_LIST_T1 ||
				   cdb_replace->list == STRAT_LIST_T2);
			Assert(cdb_replace->buf_id == buf->buf_id);
			Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));

			if (cdb_replace->list == STRAT_LIST_T1)
			{
				STRAT_LIST_REMOVE(cdb_replace);
				STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
			}
			else
			{
				BufTableDelete(&(cdb_replace->buf_tag));
				STRAT_LIST_REMOVE(cdb_replace);
				cdb_replace->next = StrategyControl->listUnusedCDB;
				StrategyControl->listUnusedCDB = cdb_replace_index;
			}
			/* And clear its block reference */
			cdb_replace->buf_id = -1;
		}
		else
		{
			/* We are satisfying it with an unused buffer */
		}

		/* Assign the buffer id to the new CDB */
		cdb_found->buf_id = buf->buf_id;

		/*
		 * Specialized VACUUM optimization. If this complete cache miss
		 * happened because vacuum needed the page, we place it at the LRU
		 * position of T1; normally it goes at the MRU position.
		 */
		if (strategy_hint_vacuum)
		{
			if (TransactionIdEquals(strategy_vacuum_xid,
									GetTopTransactionId()))
				STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
			else
			{
				/* VACUUM must have been aborted by error, reset flag */
				strategy_hint_vacuum = false;
				STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
			}
		}
		else
			STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);

		/*
		 * Remember the Xid when this buffer went onto T1 to avoid a
		 * single UPDATE promoting a newcomer straight into T2. Also
		 * remember if it was loaded for VACUUM.
		 */
		cdb_found->t1_xid = GetTopTransactionId();
		cdb_found->t1_vacuum = strategy_hint_vacuum;
	}
}
/*
 * Indicate we intend to create a relation file as part of the current transaction.
 *
 * An XLOG IntentToCreate record is generated that will guard the subsequent file-system
 * create in case the transaction aborts.
 *
 * After 1 or more calls to this routine to mark intention about relation files that are going
 * to be created, call ~_DoPendingCreates to do the actual file-system creates.  (See its
 * note on XLOG flushing).
 */
void PersistentDatabase_MarkCreatePending(
	DbDirNode 		*dbDirNode,
	ItemPointer		persistentTid,
	int64			*persistentSerialNum,
	bool			flushToXLog)
{
	WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

	DatabaseDirEntry databaseDirEntry;
	SharedOidSearchAddResult addResult;

	PersistentFileSysObjName fsObjName;

	if (Persistent_BeforePersistenceWork())
	{	
		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "Skipping persistent database '%s' because we are before persistence work",
				 GetDatabasePath(
					  dbDirNode->database, 
					  dbDirNode->tablespace));
		/*
		 * The initdb process will load the persistent table once we 
		 * out of bootstrap mode.
		 */
		return;
	}

	PersistentDatabase_VerifyInitScan();

	PersistentFileSysObjName_SetDatabaseDir(
									&fsObjName,
									dbDirNode->tablespace,
									dbDirNode->database,
									is_tablespace_shared);

	WRITE_PERSISTENT_STATE_ORDERED_LOCK;

	databaseDirEntry =
			(DatabaseDirEntry)
				    SharedOidSearch_Find(
				    		&persistentDatabaseSharedData->databaseDirSearchTable,
				    		dbDirNode->database,
				    		dbDirNode->tablespace);
	if (databaseDirEntry != NULL)
		elog(ERROR, "Persistent database entry '%s' already exists in state '%s'", 
			 GetDatabasePath(
				   dbDirNode->database, 
				   dbDirNode->tablespace),
		     PersistentFileSysObjState_Name(databaseDirEntry->state));

	addResult =
		    SharedOidSearch_Add(
		    		&persistentDatabaseSharedData->databaseDirSearchTable,
					dbDirNode->database,
					dbDirNode->tablespace,
		    		(SharedOidSearchObjHeader**)&databaseDirEntry);
	if (addResult == SharedOidSearchAddResult_NoMemory)
	{
		/* If out of shared memory, no need to promote to PANIC. */
		WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("Out of shared-memory for persistent databases"),
				 errhint("You may need to increase the gp_max_databases and "
				 		 "gp_max_tablespaces value"),
				 errOmitLocation(true)));
	}
	else if (addResult == SharedOidSearchAddResult_Exists)
		elog(PANIC, "Persistent database entry '%s' already exists in state '%s'", 
		     GetDatabasePath(
		     		dbDirNode->database, 
		     		dbDirNode->tablespace),
		     PersistentFileSysObjState_Name(databaseDirEntry->state));
	else
		Assert(addResult == SharedOidSearchAddResult_Ok);

	databaseDirEntry->state = PersistentFileSysState_CreatePending;

	databaseDirEntry->iteratorRefCount = 0;

	PersistentDatabase_AddTuple(
							databaseDirEntry,
							/* reserved */ 0,
							/* parentXid */ GetTopTransactionId(),
							flushToXLog);

	*persistentTid = databaseDirEntry->persistentTid;
	*persistentSerialNum = databaseDirEntry->persistentSerialNum;
	
	/*
	 * This XLOG must be generated under the persistent write-lock.
	 */
#ifdef MASTER_MIRROR_SYNC
	mmxlog_log_create_database(dbDirNode->tablespace, dbDirNode->database); 
#endif


	#ifdef FAULT_INJECTOR
			FaultInjector_InjectFaultIfSet(
										   FaultBeforePendingDeleteDatabaseEntry,
										   DDLNotSpecified,
										   "",  // databaseName
										   ""); // tableName
	#endif

	/*
	 * MPP-18228
	 * To make adding 'Create Pending' entry to persistent table and adding
	 * to the PendingDelete list atomic
	 */
	PendingDelete_AddCreatePendingEntryWrapper(
					&fsObjName,
					persistentTid,
					*persistentSerialNum);

	WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;
}
예제 #18
0
/*
 * AlterSequence
 *
 * Modify the definition of a sequence relation
 */
ObjectAddress
AlterSequence(AlterSeqStmt *stmt)
{
	Oid			relid;
	SeqTable	elm;
	Relation	seqrel;
	Buffer		buf;
	HeapTupleData seqtuple;
	Form_pg_sequence seq;
	FormData_pg_sequence new___;
	List	   *owned_by;
	ObjectAddress address;

	/* Open and lock sequence. */
	relid = RangeVarGetRelid(stmt->sequence, AccessShareLock, stmt->missing_ok);
	if (relid == InvalidOid)
	{
		ereport(NOTICE,
				(errmsg("relation \"%s\" does not exist, skipping",
						stmt->sequence->relname)));
		return InvalidObjectAddress;
	}

	init_sequence(relid, &elm, &seqrel);

	/* allow ALTER to sequence owner only */
	if (!pg_class_ownercheck(relid, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
					   stmt->sequence->relname);

	/* lock page' buffer and read tuple into new___ sequence structure */
	seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);

	/* Copy old values of options into workspace */
	memcpy(&new___, seq, sizeof(FormData_pg_sequence));

	/* Check and set new___ values */
	init_params(stmt->options, false, &new___, &owned_by);

	/* Clear local cache so that we don't think we have cached numbers */
	/* Note that we do not change the currval() state */
	elm->cached = elm->last;

	/* check the comment above nextval_internal()'s equivalent call. */
	if (RelationNeedsWAL(seqrel))
		GetTopTransactionId();

	/* Now okay to update the on-disk tuple */
	START_CRIT_SECTION();

	memcpy(seq, &new___, sizeof(FormData_pg_sequence));

	MarkBufferDirty(buf);

	/* XLOG stuff */
	if (RelationNeedsWAL(seqrel))
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;
		Page		page = BufferGetPage(buf);

		XLogBeginInsert();
		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);

		xlrec.node = seqrel->rd_node;
		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));

		XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);

		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);

		PageSetLSN(page, recptr);
	}

	END_CRIT_SECTION();

	UnlockReleaseBuffer(buf);

	/* process OWNED BY if given */
	if (owned_by)
		process_owned_by(seqrel, owned_by);

	InvokeObjectPostAlterHook(RelationRelationId, relid, 0);

	ObjectAddressSet(address, RelationRelationId, relid);

	relation_close(seqrel, NoLock);

	return address;
}
예제 #19
0
/*
 * Indicate we intend to create a tablespace file as part of the current transaction.
 *
 * An XLOG IntentToCreate record is generated that will guard the subsequent file-system
 * create in case the transaction aborts.
 *
 * After 1 or more calls to this routine to mark intention about tablespace files that are going
 * to be created, call ~_DoPendingCreates to do the actual file-system creates.  (See its
 * note on XLOG flushing).
 */
void
PersistentTablespace_MarkCreatePending(
									   Oid filespaceOid,
 /* The filespace where the tablespace lives. */

									   Oid tablespaceOid,
 /* The tablespace OID for the create. */

									   MirroredObjectExistenceState mirrorExistenceState,

									   ItemPointer persistentTid,
 /* TID of the gp_persistent_rel_files tuple for the rel file */

									   int64 *persistentSerialNum,


									   bool flushToXLog)
 /* When true, the XLOG record for this change will be flushed to disk. */

{
	WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

	PersistentFileSysObjName fsObjName;

	TablespaceDirEntry tablespaceDirEntry;
	TransactionId topXid;

	if (Persistent_BeforePersistenceWork())
	{
		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(),
				 "Skipping persistent tablespace %u because we are before persistence work",
				 tablespaceOid);

		return;

		/*
		 * The initdb process will load the persistent table once we out of
		 * bootstrap mode.
		 */
	}

	PersistentTablespace_VerifyInitScan();

	PersistentFileSysObjName_SetTablespaceDir(&fsObjName, tablespaceOid);

	topXid = GetTopTransactionId();

	WRITE_PERSISTENT_STATE_ORDERED_LOCK;

	PersistentTablespace_AddTuple(
								  filespaceOid,
								  tablespaceOid,
								  PersistentFileSysState_CreatePending,
								   /* createMirrorDataLossTrackingSessionNum */ 0,
								  mirrorExistenceState,
								   /* reserved */ 0,
								   /* parentXid */ topXid,
								  flushToXLog,
								  persistentTid,
								  persistentSerialNum);

	WRITE_TABLESPACE_HASH_LOCK;
	tablespaceDirEntry =
		PersistentTablespace_CreateEntryUnderLock(filespaceOid, tablespaceOid);
	Assert(tablespaceDirEntry != NULL);
	tablespaceDirEntry->state = PersistentFileSysState_CreatePending;
	ItemPointerCopy(persistentTid, &tablespaceDirEntry->persistentTid);
	tablespaceDirEntry->persistentSerialNum = *persistentSerialNum;
	WRITE_TABLESPACE_HASH_UNLOCK;

	/*
	 * This XLOG must be generated under the persistent write-lock.
	 */
#ifdef MASTER_MIRROR_SYNC
	mmxlog_log_create_tablespace(
								 filespaceOid,
								 tablespaceOid);
#endif

	SIMPLE_FAULT_INJECTOR(FaultBeforePendingDeleteTablespaceEntry);

	/*
	 * MPP-18228 To make adding 'Create Pending' entry to persistent table and
	 * adding to the PendingDelete list atomic
	 */
	PendingDelete_AddCreatePendingEntryWrapper(
											   &fsObjName,
											   persistentTid,
											   *persistentSerialNum);

	WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;

	if (Debug_persistent_print)
		elog(Persistent_DebugPrintLevel(),
			 "Persistent tablespace directory: Add '%s' in state 'Created', mirror existence state '%s', serial number " INT64_FORMAT " at TID %s",
			 PersistentFileSysObjName_ObjectName(&fsObjName),
			 MirroredObjectExistenceState_Name(mirrorExistenceState),
			 *persistentSerialNum,
			 ItemPointerToString(persistentTid));
}
예제 #20
0
/*
 * StrategyHintVacuum -- tell us whether VACUUM is active
 */
void
StrategyHintVacuum(bool vacuum_active)
{
	strategy_hint_vacuum = vacuum_active;
	strategy_vacuum_xid = GetTopTransactionId();
}
예제 #21
0
/*
 * Indicate we intend to create a relation file as part of the current transaction.
 *
 * This function adds an entry in 'gp_persistent_relation_node' for either a new table (segment file
 * # 0) or a new segment file under AO table (segment file # > 0 for row/column-oriented AO) with a state
 * 'Create Pending'. An XLOG IntentToCreate record is generated that will guard the subsequent file-system
 * create in case the transaction aborts.
 *
 * Paramaters
 * -----------
 * relFileNode = The tablespace, database, and relation OIDs for the create
 * segmentFileNum = As the name implies (   0 for heap
 *                                       >= 0 for RO/CO AO as applicable)
 * relStorageMgr = Persistent Relation storage Manager
 * relBufpoolKind = Buffer pool type beneath corrosponding relation
 * TODO bufferPollBulkLoad = ???
 * TODO mirrorExistenceState = ???
 * TODO relDataSynchronizationState = ???
 * flushToXlog = If true, the XLOG record for this change will be flushed to disk.
 * TODO isLocalBuf = ???
 *
 * Return
 * ------
 * relationName = Name of the relation used for either debugging or to store in PendingDelete LL.
 * persistentTid = Resulting TID of the gp_persistent_rel_files tuple for the relation
 * serialNum = Resulting serial number for the relation.  Distinquishes the uses of the tuple
 */
void PersistentRelation_AddCreatePending(
    RelFileNode 		*relFileNode,
    int32				segmentFileNum,
    PersistentFileSysRelStorageMgr relStorageMgr,
    PersistentFileSysRelBufpoolKind relBufpoolKind,
    bool				bufferPoolBulkLoad,
    MirroredObjectExistenceState mirrorExistenceState,
    MirroredRelDataSynchronizationState relDataSynchronizationState,
    char				*relationName,
    ItemPointer			persistentTid,
    int64				*serialNum,
    bool 				flushToXLog,
    bool				isLocalBuf)
{
    WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

    PersistentFileSysObjName fsObjName;

    XLogRecPtr mirrorBufpoolResyncCkptLoc;
    ItemPointerData previousFreeTid;

    Datum values[Natts_gp_persistent_relation_node];

    if(RelFileNode_IsEmpty(relFileNode))
        elog(ERROR, "Invalid RelFileNode (0,0,0)");

    MemSet(&previousFreeTid, 0, sizeof(ItemPointerData));
    MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr));

    if (Persistent_BeforePersistenceWork())
    {
        if (Debug_persistent_print)
            elog(Persistent_DebugPrintLevel(),
                 "Skipping persistent relation '%s' because we are before persistence work",
                 relpath(*relFileNode));

        MemSet(persistentTid, 0, sizeof(ItemPointerData));
        *serialNum = 0;

        return;	// The initdb process will load the persistent table once we out of bootstrap mode.
    }

    /* Verify if the needed shared mem data structures for persistent tables are setup and inited */
    PersistentRelation_VerifyInitScan();

    /* Setup the file system object name */
    PersistentFileSysObjName_SetRelationFile(
        &fsObjName,
        relFileNode,
        segmentFileNum);

    WRITE_PERSISTENT_STATE_ORDERED_LOCK;

    /* Create a values array which will be used to create a 'gp_persistent_relation_node' tuple */
    GpPersistentRelationNode_SetDatumValues(
        values,
        relFileNode->spcNode,
        relFileNode->dbNode,
        relFileNode->relNode,
        segmentFileNum,
        relStorageMgr,
        (bufferPoolBulkLoad ?
         PersistentFileSysState_BulkLoadCreatePending :
         PersistentFileSysState_CreatePending),
        /* createMirrorDataLossTrackingSessionNum */ 0,
        mirrorExistenceState,
        relDataSynchronizationState,
        /* mirrorBufpoolMarkedForScanIncrementalResync */ false,
        /* mirrorBufpoolResyncChangedPageCount */ 0,
        &mirrorBufpoolResyncCkptLoc,
        /* mirrorBufpoolResyncCkptBlockNum */ 0,
        /* mirrorAppendOnlyLossEof */ 0,
        /* mirrorAppendOnlyNewEof */ 0,
        relBufpoolKind,
        GetTopTransactionId(),
        /* persistentSerialNum */ 0,	// This will be set by PersistentFileSysObj_AddTuple.
        &previousFreeTid);

    /* Add a new tuple to 'gp_persistent_relation_node' table for the new relation/segment file
     * we intend to create. This will also create and apply a new persistent serial number. */
    PersistentFileSysObj_AddTuple(
        PersistentFsObjType_RelationFile,
        values,
        flushToXLog,
        persistentTid,
        serialNum);

    /*
     * This XLOG must be generated under the persistent write-lock.
     */
#ifdef MASTER_MIRROR_SYNC
    mmxlog_log_create_relfilenode(
        relFileNode->spcNode,
        relFileNode->dbNode,
        relFileNode->relNode,
        segmentFileNum);
#endif

#ifdef FAULT_INJECTOR
    FaultInjector_InjectFaultIfSet(
        FaultBeforePendingDeleteRelationEntry,
        DDLNotSpecified,
        "",  // databaseName
        ""); // tableName
#endif

    /* We'll add an entry to the PendingDelete LinkedList (LL) to remeber what we
     * created in this transaction (or sub-transaction). If the transaction
     * aborts then we can search for all such entries in this LL and get rid of (delete)
     * such relations or segment files on the disk.
    *
    * MPP-18228
    * To make adding 'Create Pending' entry to persistent table and adding
    * to the PendingDelete list atomic
    */
    PendingDelete_AddCreatePendingRelationEntry(
        &fsObjName,
        persistentTid,
        serialNum,
        relStorageMgr,
        relationName,
        isLocalBuf,
        bufferPoolBulkLoad);


    WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;

    if (Debug_persistent_print)
        elog(Persistent_DebugPrintLevel(),
             "Persistent relation: Add '%s', relation name '%s' in state 'Create Pending', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s",
             PersistentFileSysObjName_ObjectName(&fsObjName),
             relationName,
             PersistentFileSysRelStorageMgr_Name(relStorageMgr),
             MirroredObjectExistenceState_Name(mirrorExistenceState),
             MirroredRelDataSynchronizationState_Name(relDataSynchronizationState),
             *serialNum,
             ItemPointerToString(persistentTid));
}
예제 #22
0
Datum
_Slony_I_createEvent(PG_FUNCTION_ARGS)
{
	TransactionId newXid = GetTopTransactionId();
	Slony_I_ClusterStatus *cs;
	char	   *ev_type_c;
	Datum		argv[9];
	char		nulls[10];
	char	   *buf;
	size_t		buf_size;
	int			rc;
	int			i;
	int64		retval;
	bool		isnull;

#ifdef HAVE_GETACTIVESNAPSHOT
	if (GetActiveSnapshot() == NULL)
		elog(ERROR, "Slony-I: ActiveSnapshot is NULL in createEvent()");
#else
	if (SerializableSnapshot == NULL)
		elog(ERROR, "Slony-I: SerializableSnapshot is NULL in createEvent()");
#endif

	if ((rc = SPI_connect()) < 0)
		elog(ERROR, "Slony-I: SPI_connect() failed in createEvent()");

	/*
	 * Get or create the cluster status information and make sure it has the
	 * SPI plans that we need here.
	 */
	cs = getClusterStatus(PG_GETARG_NAME(0),
						  PLAN_INSERT_EVENT);

	buf_size = 8192;
	buf = palloc(buf_size);

	/*
	 * Do the following only once per transaction.
	 */
	if (!TransactionIdEquals(cs->currentXid, newXid))
	{
		cs->currentXid = newXid;
	}

	/*
	 * Call the saved INSERT plan
	 */
	for (i = 1; i < 10; i++)
	{
		if (i >= PG_NARGS() || PG_ARGISNULL(i))
		{
			argv[i - 1] = (Datum) 0;
			nulls[i - 1] = 'n';
		}
		else
		{
			argv[i - 1] = PG_GETARG_DATUM(i);
			nulls[i - 1] = ' ';
		}
	}
	nulls[9] = '\0';

	if ((rc = SPI_execp(cs->plan_insert_event, argv, nulls, 0)) < 0)
		elog(ERROR, "Slony-I: SPI_execp() failed for \"INSERT INTO sl_event ...\"");

	/*
	 * The INSERT plan also contains a SELECT currval('sl_event_seq'), use the
	 * new sequence number as return value.
	 */
	if (SPI_processed != 1)
		elog(ERROR, "Slony-I: INSERT plan did not return 1 result row");
	retval = DatumGetInt64(SPI_getbinval(SPI_tuptable->vals[0],
										 SPI_tuptable->tupdesc, 1, &isnull));

	/*
	 * For SYNC and ENABLE_SUBSCRIPTION events, we also remember all current
	 * sequence values.
	 */
	if (PG_NARGS() > 1 && !PG_ARGISNULL(1))
	{
		ev_type_c = DatumGetPointer(DirectFunctionCall1(
											   textout, PG_GETARG_DATUM(1)));
		if (strcmp(ev_type_c, "SYNC") == 0 ||
			strcmp(ev_type_c, "ENABLE_SUBSCRIPTION") == 0)
		{
/*@-nullpass@*/
			if ((rc = SPI_execp(cs->plan_record_sequences, NULL, NULL, 0)) < 0)
				elog(ERROR, "Slony-I: SPI_execp() failed for \"INSERT INTO sl_seqlog ...\"");
/*@+nullpass@*/
		}
	}

	(void) SPI_finish();
/*@-mustfreefresh@*/
	PG_RETURN_INT64(retval);
}
예제 #23
0
/*
 * Main internal procedure that handles 2 & 3 arg forms of SETVAL.
 *
 * Note that the 3 arg version (which sets the is_called flag) is
 * only for use in pg_dump, and setting the is_called flag may not
 * work if multiple users are attached to the database and referencing
 * the sequence (unlikely if pg_dump is restoring it).
 *
 * It is necessary to have the 3 arg version so that pg_dump can
 * restore the state of a sequence exactly during data-only restores -
 * it is the only way to clear the is_called flag in an existing
 * sequence.
 */
static void
do_setval(Oid relid, int64 next, bool iscalled)
{
	SeqTable	elm;
	Relation	seqrel;
	Buffer		buf;
	HeapTupleData seqtuple;
	Form_pg_sequence seq;

	/* open and AccessShareLock sequence */
	init_sequence(relid, &elm, &seqrel);

	if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK)
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("permission denied for sequence %s",
						RelationGetRelationName(seqrel))));

	/* read-only transactions may only modify temp sequences */
	if (!seqrel->rd_islocaltemp)
		PreventCommandIfReadOnly("setval()");

	/*
	 * Forbid this during parallel operation because, to make it work,
	 * the cooperating backends would need to share the backend-local cached
	 * sequence information.  Currently, we don't support that.
	 */
	PreventCommandIfParallelMode("setval()");

	/* lock page' buffer and read tuple */
	seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);

	if ((next < seq->min_value) || (next > seq->max_value))
	{
		char		bufv[100],
					bufm[100],
					bufx[100];

		snprintf(bufv, sizeof(bufv), INT64_FORMAT, next);
		snprintf(bufm, sizeof(bufm), INT64_FORMAT, seq->min_value);
		snprintf(bufx, sizeof(bufx), INT64_FORMAT, seq->max_value);
		ereport(ERROR,
				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
				 errmsg("setval: value %s is out of bounds for sequence \"%s\" (%s..%s)",
						bufv, RelationGetRelationName(seqrel),
						bufm, bufx)));
	}

	/* Set the currval() state only if iscalled = true */
	if (iscalled)
	{
		elm->last = next;		/* last returned number */
		elm->last_valid = true;
	}

	/* In any case, forget any future cached numbers */
	elm->cached = elm->last;

	/* check the comment above nextval_internal()'s equivalent call. */
	if (RelationNeedsWAL(seqrel))
		GetTopTransactionId();

	/* ready to change the on-disk (or really, in-buffer) tuple */
	START_CRIT_SECTION();

	seq->last_value = next;		/* last fetched number */
	seq->is_called = iscalled;
	seq->log_cnt = 0;

	MarkBufferDirty(buf);

	/* XLOG stuff */
	if (RelationNeedsWAL(seqrel))
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;
		Page		page = BufferGetPage(buf);

		XLogBeginInsert();
		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);

		xlrec.node = seqrel->rd_node;
		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
		XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);

		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);

		PageSetLSN(page, recptr);
	}

	END_CRIT_SECTION();

	UnlockReleaseBuffer(buf);

	relation_close(seqrel, NoLock);
}
예제 #24
0
static int64
nextval_internal(Oid relid)
{
	SeqTable	elm;
	Relation	seqrel;
	Buffer		buf;
	Page		page;
	HeapTupleData seqtuple;
	Form_pg_sequence seq;
	int64		incby,
				maxv,
				minv,
				cache,
				log,
				fetch,
				last;
	int64		result,
				next,
				rescnt = 0;
	bool		logit = false;

	/* open and AccessShareLock sequence */
	init_sequence(relid, &elm, &seqrel);

	if (pg_class_aclcheck(elm->relid, GetUserId(),
						  ACL_USAGE | ACL_UPDATE) != ACLCHECK_OK)
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("permission denied for sequence %s",
						RelationGetRelationName(seqrel))));

	/* read-only transactions may only modify temp sequences */
	if (!seqrel->rd_islocaltemp)
		PreventCommandIfReadOnly("nextval()");

	/*
	 * Forbid this during parallel operation because, to make it work,
	 * the cooperating backends would need to share the backend-local cached
	 * sequence information.  Currently, we don't support that.
	 */
	PreventCommandIfParallelMode("nextval()");

	if (elm->last != elm->cached)		/* some numbers were cached */
	{
		Assert(elm->last_valid);
		Assert(elm->increment != 0);
		elm->last += elm->increment;
		relation_close(seqrel, NoLock);
		last_used_seq = elm;
		return elm->last;
	}

	/* lock page' buffer and read tuple */
	seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
	page = BufferGetPage(buf);

	last = next = result = seq->last_value;
	incby = seq->increment_by;
	maxv = seq->max_value;
	minv = seq->min_value;
	fetch = cache = seq->cache_value;
	log = seq->log_cnt;

	if (!seq->is_called)
	{
		rescnt++;				/* return last_value if not is_called */
		fetch--;
	}

	/*
	 * Decide whether we should emit a WAL log record.  If so, force up the
	 * fetch count to grab SEQ_LOG_VALS more values than we actually need to
	 * cache.  (These will then be usable without logging.)
	 *
	 * If this is the first nextval after a checkpoint, we must force a new___
	 * WAL record to be written anyway, else replay starting from the
	 * checkpoint would fail to advance the sequence past the logged values.
	 * In this case we may as well fetch extra values.
	 */
	if (log < fetch || !seq->is_called)
	{
		/* forced log to satisfy local demand for values */
		fetch = log = fetch + SEQ_LOG_VALS;
		logit = true;
	}
	else
	{
		XLogRecPtr	redoptr = GetRedoRecPtr();

		if (PageGetLSN(page) <= redoptr)
		{
			/* last update of seq was before checkpoint */
			fetch = log = fetch + SEQ_LOG_VALS;
			logit = true;
		}
	}

	while (fetch)				/* try to fetch cache [+ log ] numbers */
	{
		/*
		 * Check MAXVALUE for ascending sequences and MINVALUE for descending
		 * sequences
		 */
		if (incby > 0)
		{
			/* ascending sequence */
			if ((maxv >= 0 && next > maxv - incby) ||
				(maxv < 0 && next + incby > maxv))
			{
				if (rescnt > 0)
					break;		/* stop fetching */
				if (!seq->is_cycled)
				{
					char		buf[100];

					snprintf(buf, sizeof(buf), INT64_FORMAT, maxv);
					ereport(ERROR,
						  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
						   errmsg("nextval: reached maximum value of sequence \"%s\" (%s)",
								  RelationGetRelationName(seqrel), buf)));
				}
				next = minv;
			}
			else
				next += incby;
		}
		else
		{
			/* descending sequence */
			if ((minv < 0 && next < minv - incby) ||
				(minv >= 0 && next + incby < minv))
			{
				if (rescnt > 0)
					break;		/* stop fetching */
				if (!seq->is_cycled)
				{
					char		buf[100];

					snprintf(buf, sizeof(buf), INT64_FORMAT, minv);
					ereport(ERROR,
						  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
						   errmsg("nextval: reached minimum value of sequence \"%s\" (%s)",
								  RelationGetRelationName(seqrel), buf)));
				}
				next = maxv;
			}
			else
				next += incby;
		}
		fetch--;
		if (rescnt < cache)
		{
			log--;
			rescnt++;
			last = next;
			if (rescnt == 1)	/* if it's first result - */
				result = next;	/* it's what to return */
		}
	}

	log -= fetch;				/* adjust for any unfetched numbers */
	Assert(log >= 0);

	/* save info in local cache */
	elm->last = result;			/* last returned number */
	elm->cached = last;			/* last fetched number */
	elm->last_valid = true;

	last_used_seq = elm;

	/*
	 * If something needs to be WAL logged, acquire an xid, so this
	 * transaction's commit will trigger a WAL flush and wait for
	 * syncrep. It's sufficient to ensure the toplevel transaction has an xid,
	 * no need to assign xids subxacts, that'll already trigger an appropriate
	 * wait.  (Have to do that here, so we're outside the critical section)
	 */
	if (logit && RelationNeedsWAL(seqrel))
		GetTopTransactionId();

	/* ready to change the on-disk (or really, in-buffer) tuple */
	START_CRIT_SECTION();

	/*
	 * We must mark the buffer dirty before doing XLogInsert(); see notes in
	 * SyncOneBuffer().  However, we don't apply the desired changes just yet.
	 * This looks like a violation of the buffer update protocol, but it is in
	 * fact safe because we hold exclusive lock on the buffer.  Any other
	 * process, including a checkpoint, that tries to examine the buffer
	 * contents will block until we release the lock, and then will see the
	 * final state that we install below.
	 */
	MarkBufferDirty(buf);

	/* XLOG stuff */
	if (logit && RelationNeedsWAL(seqrel))
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;

		/*
		 * We don't log the current state of the tuple, but rather the state
		 * as it would appear after "log" more fetches.  This lets us skip
		 * that many future WAL records, at the cost that we lose those
		 * sequence values if we crash.
		 */
		XLogBeginInsert();
		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);

		/* set values that will be saved in xlog */
		seq->last_value = next;
		seq->is_called = true;
		seq->log_cnt = 0;

		xlrec.node = seqrel->rd_node;

		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
		XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);

		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);

		PageSetLSN(page, recptr);
	}

	/* Now update sequence tuple to the intended final state */
	seq->last_value = last;		/* last fetched number */
	seq->is_called = true;
	seq->log_cnt = log;			/* how much is logged */

	END_CRIT_SECTION();

	UnlockReleaseBuffer(buf);

	relation_close(seqrel, NoLock);

	return result;
}
예제 #25
0
/*
 * ExportSnapshot
 *		Export the snapshot to a file so that other backends can import it.
 *		Returns the token (the file name) that can be used to import this
 *		snapshot.
 */
static char *
ExportSnapshot(Snapshot snapshot)
{
	TransactionId topXid;
	TransactionId *children;
	int			nchildren;
	int			addTopXid;
	StringInfoData buf;
	FILE	   *f;
	int			i;
	MemoryContext oldcxt;
	char		path[MAXPGPATH];
	char		pathtmp[MAXPGPATH];

	/*
	 * It's tempting to call RequireTransactionChain here, since it's not
	 * very useful to export a snapshot that will disappear immediately
	 * afterwards.  However, we haven't got enough information to do that,
	 * since we don't know if we're at top level or not.  For example, we
	 * could be inside a plpgsql function that is going to fire off other
	 * transactions via dblink.  Rather than disallow perfectly legitimate
	 * usages, don't make a check.
	 *
	 * Also note that we don't make any restriction on the transaction's
	 * isolation level; however, importers must check the level if they
	 * are serializable.
	 */

	/*
	 * This will assign a transaction ID if we do not yet have one.
	 */
	topXid = GetTopTransactionId();

	/*
	 * We cannot export a snapshot from a subtransaction because there's no
	 * easy way for importers to verify that the same subtransaction is still
	 * running.
	 */
	if (IsSubTransaction())
		ereport(ERROR,
				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
				 errmsg("cannot export a snapshot from a subtransaction")));

	/*
	 * We do however allow previous committed subtransactions to exist.
	 * Importers of the snapshot must see them as still running, so get their
	 * XIDs to add them to the snapshot.
	 */
	nchildren = xactGetCommittedChildren(&children);

	/*
	 * Copy the snapshot into TopTransactionContext, add it to the
	 * exportedSnapshots list, and mark it pseudo-registered.  We do this to
	 * ensure that the snapshot's xmin is honored for the rest of the
	 * transaction.  (Right now, because SnapshotResetXmin is so stupid, this
	 * is overkill; but later we might make that routine smarter.)
	 */
	snapshot = CopySnapshot(snapshot);

	oldcxt = MemoryContextSwitchTo(TopTransactionContext);
	exportedSnapshots = lappend(exportedSnapshots, snapshot);
	MemoryContextSwitchTo(oldcxt);

	snapshot->regd_count++;
	RegisteredSnapshots++;

	/*
	 * Fill buf with a text serialization of the snapshot, plus identification
	 * data about this transaction.  The format expected by ImportSnapshot
	 * is pretty rigid: each line must be fieldname:value.
	 */
	initStringInfo(&buf);

	appendStringInfo(&buf, "xid:%u\n", topXid);
	appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
	appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
	appendStringInfo(&buf, "ro:%d\n", XactReadOnly);

	appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
	appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);

	/*
	 * We must include our own top transaction ID in the top-xid data, since
	 * by definition we will still be running when the importing transaction
	 * adopts the snapshot, but GetSnapshotData never includes our own XID in
	 * the snapshot.  (There must, therefore, be enough room to add it.)
	 *
	 * However, it could be that our topXid is after the xmax, in which case
	 * we shouldn't include it because xip[] members are expected to be before
	 * xmax.  (We need not make the same check for subxip[] members, see
	 * snapshot.h.)
	 */
	addTopXid = TransactionIdPrecedes(topXid, snapshot->xmax) ? 1 : 0;
	appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
	for (i = 0; i < snapshot->xcnt; i++)
		appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
	if (addTopXid)
		appendStringInfo(&buf, "xip:%u\n", topXid);

	/*
	 * Similarly, we add our subcommitted child XIDs to the subxid data.
	 * Here, we have to cope with possible overflow.
	 */
	if (snapshot->suboverflowed ||
		snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
		appendStringInfoString(&buf, "sof:1\n");
	else
	{
		appendStringInfoString(&buf, "sof:0\n");
		appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
		for (i = 0; i < snapshot->subxcnt; i++)
			appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
		for (i = 0; i < nchildren; i++)
			appendStringInfo(&buf, "sxp:%u\n", children[i]);
	}
	appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);

	/*
	 * Now write the text representation into a file.  We first write to a
	 * ".tmp" filename, and rename to final filename if no error.  This
	 * ensures that no other backend can read an incomplete file
	 * (ImportSnapshot won't allow it because of its valid-characters check).
	 */
	XactExportFilePath(pathtmp, topXid, list_length(exportedSnapshots), ".tmp");
	if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not create file \"%s\": %m", pathtmp)));

	if (fwrite(buf.data, buf.len, 1, f) != 1)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not write to file \"%s\": %m", pathtmp)));

	/* no fsync() since file need not survive a system crash */

	if (FreeFile(f))
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not write to file \"%s\": %m", pathtmp)));

	/*
	 * Now that we have written everything into a .tmp file, rename the file
	 * to remove the .tmp suffix.
	 */
	XactExportFilePath(path, topXid, list_length(exportedSnapshots), "");

	if (rename(pathtmp, path) < 0)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not rename file \"%s\" to \"%s\": %m",
						pathtmp, path)));

	/*
	 * The basename of the file is what we return from pg_export_snapshot().
	 * It's already in path in a textual format and we know that the path
	 * starts with SNAPSHOT_EXPORT_DIR.  Skip over the prefix and the slash
	 * and pstrdup it so as not to return the address of a local variable.
	 */
	return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
}
예제 #26
0
Datum
_Slony_I_logTrigger(PG_FUNCTION_ARGS)
{
	TransactionId newXid = GetTopTransactionId();
	Slony_I_ClusterStatus *cs;
	TriggerData *tg;
	Datum		argv[4];
	text	   *cmdtype = NULL;
	int			rc;
	Name		cluster_name;
	int32		tab_id;
	char	   *attkind;
	int			attkind_idx;
	int			cmddata_need;

	/*
	 * Don't do any logging if the current session role isn't Origin.
	 */
	if (SessionReplicationRole != SESSION_REPLICATION_ROLE_ORIGIN)
		return PointerGetDatum(NULL);

	/*
	 * Get the trigger call context
	 */
	if (!CALLED_AS_TRIGGER(fcinfo))
		elog(ERROR, "Slony-I: logTrigger() not called as trigger");
	tg = (TriggerData *) (fcinfo->context);

	/*
	 * Check all logTrigger() calling conventions
	 */
	if (!TRIGGER_FIRED_AFTER(tg->tg_event))
		elog(ERROR, "Slony-I: logTrigger() must be fired AFTER");
	if (!TRIGGER_FIRED_FOR_ROW(tg->tg_event))
		elog(ERROR, "Slony-I: logTrigger() must be fired FOR EACH ROW");
	if (tg->tg_trigger->tgnargs != 3)
		elog(ERROR, "Slony-I: logTrigger() must be defined with 3 args");

	/*
	 * Connect to the SPI manager
	 */
	if ((rc = SPI_connect()) < 0)
		elog(ERROR, "Slony-I: SPI_connect() failed in createEvent()");

	/*
	 * Get all the trigger arguments
	 */
	cluster_name = DatumGetName(DirectFunctionCall1(namein,
								CStringGetDatum(tg->tg_trigger->tgargs[0])));
	tab_id = strtol(tg->tg_trigger->tgargs[1], NULL, 10);
	attkind = tg->tg_trigger->tgargs[2];

	/*
	 * Get or create the cluster status information and make sure it has the
	 * SPI plans that we need here.
	 */
	cs = getClusterStatus(cluster_name, PLAN_INSERT_LOG);

	/*
	 * Do the following only once per transaction.
	 */
	if (!TransactionIdEquals(cs->currentXid, newXid))
	{
		int32		log_status;
		bool isnull;

		/*
		 * Determine the currently active log table
		 */
		if (SPI_execp(cs->plan_get_logstatus, NULL, NULL, 0) < 0)
			elog(ERROR, "Slony-I: cannot determine log status");
		if (SPI_processed != 1)
			elog(ERROR, "Slony-I: cannot determine log status");

		log_status = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
											SPI_tuptable->tupdesc, 1, &isnull));
		SPI_freetuptable(SPI_tuptable);

		switch (log_status)
		{
			case 0:
			case 2:
				cs->plan_active_log = cs->plan_insert_log_1;
				break;

			case 1:
			case 3:
				cs->plan_active_log = cs->plan_insert_log_2;
				break;

			default:
				elog(ERROR, "Slony-I: illegal log status %d", log_status);
				break;
		}

		cs->currentXid = newXid;
	}

	/*
	 * Determine cmdtype and cmddata depending on the command type
	 */
	if (TRIGGER_FIRED_BY_INSERT(tg->tg_event))
	{
		HeapTuple	new_row = tg->tg_trigtuple;
		TupleDesc	tupdesc = tg->tg_relation->rd_att;
		char	   *col_ident;
		char	   *col_value;

		int			len_ident;
		int			len_value;
		int			i;
		int			need_comma = false;
		char	   *OldDateStyle;
		char	   *cp = VARDATA(cs->cmddata_buf);

		/*
		 * INSERT
		 *
		 * cmdtype = 'I' cmddata = ("col" [, ...]) values ('value' [, ...])
		 */
		cmdtype = cs->cmdtype_I;

		/*
		 * Specify all the columns
		 */
		*cp++ = '(';
		for (i = 0; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Skip dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_comma)
				*cp++ = ',';
			else
				need_comma = true;

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
		}

		/*
		 * Append the string ") values ("
		 */
		*cp++ = ')';
		*cp++ = ' ';
		*cp++ = 'v';
		*cp++ = 'a';
		*cp++ = 'l';
		*cp++ = 'u';
		*cp++ = 'e';
		*cp++ = 's';
		*cp++ = ' ';
		*cp++ = '(';

		/*
		 * Append the values
		 */
		need_comma = false;
		OldDateStyle = GetConfigOptionByName("DateStyle", NULL);
		if (!strstr(OldDateStyle, "ISO"))
			set_config_option("DateStyle", "ISO", PGC_USERSET, PGC_S_SESSION, true, true);
		for (i = 0; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Skip dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;


			if ((col_value = SPI_getvalue(new_row, tupdesc, i + 1)) == NULL)
			{
				col_value = "NULL";
			}
			else
			{
				col_value = slon_quote_literal(col_value);
			}

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_comma)
				*cp++ = ',';
			else
				need_comma = true;

			memcpy(cp, col_value, len_value);
			cp += len_value;
		}

		if (!strstr(OldDateStyle, "ISO"))
			set_config_option("DateStyle", OldDateStyle, PGC_USERSET, PGC_S_SESSION, true, true);

		/*
		 * Terminate and done
		 */
		*cp++ = ')';
		*cp = '\0';
		SET_VARSIZE(cs->cmddata_buf,
					VARHDRSZ + (cp - VARDATA(cs->cmddata_buf)));
	}
	else if (TRIGGER_FIRED_BY_UPDATE(tg->tg_event))
	{
		HeapTuple	old_row = tg->tg_trigtuple;
		HeapTuple	new_row = tg->tg_newtuple;
		TupleDesc	tupdesc = tg->tg_relation->rd_att;
		Datum		old_value;
		Datum		new_value;
		bool		old_isnull;
		bool		new_isnull;

		char	   *col_ident;
		char	   *col_value;
		int			len_ident;
		int			len_value;
		int			i;
		int			need_comma = false;
		int			need_and = false;
		char	   *OldDateStyle;

		char	   *cp = VARDATA(cs->cmddata_buf);

		/*
		 * UPDATE
		 *
		 * cmdtype = 'U' cmddata = "col_ident"='value' [, ...] where
		 * "pk_ident" = 'value' [ and ...]
		 */
		cmdtype = cs->cmdtype_U;
		for (i = 0; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Ignore dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			old_value = SPI_getbinval(old_row, tupdesc, i + 1, &old_isnull);
			new_value = SPI_getbinval(new_row, tupdesc, i + 1, &new_isnull);

			/*
			 * If old and new value are NULL, the column is unchanged
			 */
			if (old_isnull && new_isnull)
				continue;

			/*
			 * If both are NOT NULL, we need to compare the values and skip
			 * setting the column if equal
			 */
			if (!old_isnull && !new_isnull)
			{
				Oid			opr_oid;
				FmgrInfo   *opr_finfo_p;

				/*
				 * Lookup the equal operators function call info using the
				 * typecache if available
				 */
#ifdef HAVE_TYPCACHE
				TypeCacheEntry *type_cache;

				type_cache = lookup_type_cache(
											   SPI_gettypeid(tupdesc, i + 1),
								  TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO);
				opr_oid = type_cache->eq_opr;
				if (opr_oid == ARRAY_EQ_OP)
					opr_oid = InvalidOid;
				else
					opr_finfo_p = &(type_cache->eq_opr_finfo);
#else
				FmgrInfo	opr_finfo;

				opr_oid = compatible_oper_funcid(makeList1(makeString("=")),
											   SPI_gettypeid(tupdesc, i + 1),
										SPI_gettypeid(tupdesc, i + 1), true);
				if (OidIsValid(opr_oid))
				{
					fmgr_info(opr_oid, &opr_finfo);
					opr_finfo_p = &opr_finfo;
				}
#endif

				/*
				 * If we have an equal operator, use that to do binary
				 * comparision. Else get the string representation of both
				 * attributes and do string comparision.
				 */
				if (OidIsValid(opr_oid))
				{
					if (DatumGetBool(FunctionCall2(opr_finfo_p,
												   old_value, new_value)))
						continue;
				}
				else
				{
					char	   *old_strval = SPI_getvalue(old_row, tupdesc, i + 1);
					char	   *new_strval = SPI_getvalue(new_row, tupdesc, i + 1);

					if (strcmp(old_strval, new_strval) == 0)
						continue;
				}
			}

			if (need_comma)
				*cp++ = ',';
			else
				need_comma = true;

			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			if (new_isnull)
				col_value = "NULL";
			else
			{
				OldDateStyle = GetConfigOptionByName("DateStyle", NULL);
				if (!strstr(OldDateStyle, "ISO"))
					set_config_option("DateStyle", "ISO", PGC_USERSET, PGC_S_SESSION, true, true);
				col_value = slon_quote_literal(SPI_getvalue(new_row, tupdesc, i + 1));
				if (!strstr(OldDateStyle, "ISO"))
					set_config_option("DateStyle", OldDateStyle, PGC_USERSET, PGC_S_SESSION, true, true);
			}
			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}

		/*
		 * It can happen that the only UPDATE an application does is to set a
		 * column to the same value again. In that case, we'd end up here with
		 * no columns in the SET clause yet. We add the first key column here
		 * with it's old value to simulate the same for the replication
		 * engine.
		 */
		if (!need_comma)
		{
			for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++)
			{
				if (tupdesc->attrs[i]->attisdropped)
					continue;

				attkind_idx++;
				if (!attkind[attkind_idx])
					elog(ERROR, "Slony-I: no key columns found in logTrigger() attkind parameter");

				if (attkind[attkind_idx] == 'k')
					break;
			}
			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1));

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}

		*cp++ = ' ';
		*cp++ = 'w';
		*cp++ = 'h';
		*cp++ = 'e';
		*cp++ = 'r';
		*cp++ = 'e';
		*cp++ = ' ';

		for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++)
		{
			/*
			 * Ignore dropped columns
			 */
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			attkind_idx++;
			if (!attkind[attkind_idx])
				break;
			if (attkind[attkind_idx] != 'k')
				continue;
			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1));
			if (col_value == NULL)
				elog(ERROR, "Slony-I: old key column %s.%s IS NULL on UPDATE",
					 NameStr(tg->tg_relation->rd_rel->relname), col_ident);

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_and)
			{
				*cp++ = ' ';
				*cp++ = 'a';
				*cp++ = 'n';
				*cp++ = 'd';
				*cp++ = ' ';
			}
			else
				need_and = true;

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}
		*cp = '\0';
		SET_VARSIZE(cs->cmddata_buf,
					VARHDRSZ + (cp - VARDATA(cs->cmddata_buf)));
	}
	else if (TRIGGER_FIRED_BY_DELETE(tg->tg_event))
	{
		HeapTuple	old_row = tg->tg_trigtuple;
		TupleDesc	tupdesc = tg->tg_relation->rd_att;
		char	   *col_ident;
		char	   *col_value;
		int			len_ident;
		int			len_value;
		int			i;
		int			need_and = false;
		char	   *cp = VARDATA(cs->cmddata_buf);

		/*
		 * DELETE
		 *
		 * cmdtype = 'D' cmddata = "pk_ident"='value' [and ...]
		 */
		cmdtype = cs->cmdtype_D;

		for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++)
		{
			if (tupdesc->attrs[i]->attisdropped)
				continue;

			attkind_idx++;
			if (!attkind[attkind_idx])
				break;
			if (attkind[attkind_idx] != 'k')
				continue;
			col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1));
			col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1));
			if (col_value == NULL)
				elog(ERROR, "Slony-I: old key column %s.%s IS NULL on DELETE",
					 NameStr(tg->tg_relation->rd_rel->relname), col_ident);

			cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 +
				(len_ident = strlen(col_ident)) +
				(len_value = strlen(col_value));
			if (cs->cmddata_size < cmddata_need)
			{
				int			have = (cp - (char *) (cs->cmddata_buf));

				while (cs->cmddata_size < cmddata_need)
					cs->cmddata_size *= 2;
				cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size);
				cp = (char *) (cs->cmddata_buf) + have;
			}

			if (need_and)
			{
				*cp++ = ' ';
				*cp++ = 'a';
				*cp++ = 'n';
				*cp++ = 'd';
				*cp++ = ' ';
			}
			else
				need_and = true;

			memcpy(cp, col_ident, len_ident);
			cp += len_ident;
			*cp++ = '=';
			memcpy(cp, col_value, len_value);
			cp += len_value;
		}
		*cp = '\0';
		SET_VARSIZE(cs->cmddata_buf,
					VARHDRSZ + (cp - VARDATA(cs->cmddata_buf)));
	}
	else
		elog(ERROR, "Slony-I: logTrigger() fired for unhandled event");

	/*
	 * Construct the parameter array and insert the log row.
	 */
	argv[0] = Int32GetDatum(tab_id);
	argv[1] = PointerGetDatum(cmdtype);
	argv[2] = PointerGetDatum(cs->cmddata_buf);
	SPI_execp(cs->plan_active_log, argv, NULL, 0);

	SPI_finish();
	return PointerGetDatum(NULL);
}
예제 #27
0
/*
 * LockGXact
 *		Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
 */
static GlobalTransaction
LockGXact(const char *gid, Oid user)
{
	int			i;

	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);

	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
	{
		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];

		/* Ignore not-yet-valid GIDs */
		if (!gxact->valid)
			continue;
		if (strcmp(gxact->gid, gid) != 0)
			continue;

		/* Found it, but has someone else got it locked? */
		if (TransactionIdIsValid(gxact->locking_xid))
		{
			if (TransactionIdIsActive(gxact->locking_xid))
				ereport(ERROR,
						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				errmsg("prepared transaction with identifier \"%s\" is busy",
					   gid)));
			gxact->locking_xid = InvalidTransactionId;
		}

		if (user != gxact->owner && !superuser_arg(user))
			ereport(ERROR,
					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				  errmsg("permission denied to finish prepared transaction"),
					 errhint("Must be superuser or the user that prepared the transaction.")));

		/*
		 * Note: it probably would be possible to allow committing from
		 * another database; but at the moment NOTIFY is known not to work and
		 * there may be some other issues as well.	Hence disallow until
		 * someone gets motivated to make it work.
		 */
		if (MyDatabaseId != gxact->proc.databaseId)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				  errmsg("prepared transaction belongs to another database"),
					 errhint("Connect to the database where the transaction was prepared to finish it.")));

		/* OK for me to lock it */
		gxact->locking_xid = GetTopTransactionId();

		LWLockRelease(TwoPhaseStateLock);

		return gxact;
	}

	LWLockRelease(TwoPhaseStateLock);

	ereport(ERROR,
			(errcode(ERRCODE_UNDEFINED_OBJECT),
		 errmsg("prepared transaction with identifier \"%s\" does not exist",
				gid)));

	/* NOTREACHED */
	return NULL;
}
void PersistentRelation_MarkCreatePending(
		RelFileNode *relFileNode,
		ItemPointer persistentTid,
		int64 *persistentSerialNum,
		bool flushToXLog)
{
	WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

	PersistentFileSysObjName fsObjName;

	RelationDirEntry relationDirEntry;

	ItemPointerData previousFreeTid;
	Datum values[Natts_gp_persistent_relation_node];

	if (RelFileNode_IsEmpty(relFileNode))
	{
		elog(ERROR, "Invalid RelFileNode (0,0,0)");
	}

	MemSet(&previousFreeTid, 0, sizeof(ItemPointerData));

	if (Persistent_BeforePersistenceWork())
	{
		if (Debug_persistent_print)
		{
			elog(Persistent_DebugPrintLevel(),
				"Skipping persistent relation '%s' because we are before persistence work",
				relpath(*relFileNode));
		}

		*persistentSerialNum = 0;
		/*
		 * The initdb process will load the persistent table once we out
		 * of bootstrap mode.
		 */
		return;
	}

	PersistentRelation_VerifyInitScan();

	PersistentFileSysObjName_SetRelationDir(
							&fsObjName,
							relFileNode,
							is_tablespace_shared);

	WRITE_PERSISTENT_STATE_ORDERED_LOCK;

	relationDirEntry = PersistentRelation_CreateEntryUnderLock(relFileNode);

	if (relationDirEntry == NULL)
	{
		/* If out of shared memory, no need to promote to PANIC. */
		WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;
		ereport(ERROR,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				errmsg("Out of shared-memory for persistent relations"),
				errhint("You may need to increase the gp_max_relations value"),
				errOmitLocation(true)));
	}

	relationDirEntry->state = PersistentFileSysState_CreatePending;

	GpPersistentRelationNode_SetDatumValues(
									values,
									relFileNode->spcNode,
									relFileNode->dbNode,
									relFileNode->relNode,
									PersistentFileSysState_CreatePending,
									/* reserved */ 0,
									/* parentXid */ GetTopTransactionId(),
									/* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple.
									&previousFreeTid,
									is_tablespace_shared(relFileNode->spcNode));

	PersistentFileSysObj_AddTuple(
							PersistentFsObjType_RelationDir,
							values,
							flushToXLog,
							&relationDirEntry->persistentTid,
							&relationDirEntry->persistentSerialNum);

	*persistentTid = relationDirEntry->persistentTid;
	*persistentSerialNum = relationDirEntry->persistentSerialNum;

	/*
	 * This XLOG must be generated under the persistent write-lock.
	 */
#ifdef MASTER_MIRROR_SYNC
	mmxlog_log_create_relation(
						relFileNode->spcNode,
						relFileNode->dbNode,
						relFileNode->relNode,
						persistentTid, persistentSerialNum);
#endif

	/*
	 * MPP-18228
	 * To make adding 'Create Pending' entry to persistent table and
	 * adding to the PendingDelete list atomic
	 */
	PendingDelete_AddCreatePendingEntryWrapper(
					&fsObjName,
					persistentTid,
					*persistentSerialNum);

	WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;
}
예제 #29
0
/*
 * Indicate we intend to create a filespace file as part of the current transaction.
 *
 * An XLOG IntentToCreate record is generated that will guard the subsequent file-system
 * create in case the transaction aborts.
 *
 * After 1 or more calls to this routine to mark intention about filespace files that are going
 * to be created, call ~_DoPendingCreates to do the actual file-system creates.  (See its
 * note on XLOG flushing).
 */
void PersistentFilespace_MarkCreatePending(
	Oid 		filespaceOid,
				/* The filespace where the filespace lives. */

	int16		primaryDbId,

	char 		*primaryFilespaceLocation,
				/*
				 * The primary filespace directory path.  NOT Blank padded.
				 * Just a NULL terminated string.
				 */

	int16		mirrorDbId,

	char 		*mirrorFilespaceLocation,

	MirroredObjectExistenceState mirrorExistenceState,

	ItemPointer		persistentTid,
				/* TID of the gp_persistent_rel_files tuple for the rel file */

	int64			*persistentSerialNum,


	bool			flushToXLog)
				/* When true, the XLOG record for this change will be flushed to disk. */

{
	WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE;

	PersistentFileSysObjName fsObjName;

	FilespaceDirEntry filespaceDirEntry;
	TransactionId topXid;
	Datum values[Natts_gp_persistent_filespace_node];
	char mirrorFilespaceLocationBlankPadded[FilespaceLocationBlankPaddedWithNullTermLen];
	char primaryFilespaceLocationBlankPadded[FilespaceLocationBlankPaddedWithNullTermLen];

	if (Persistent_BeforePersistenceWork())
	{
		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(),
				 "Skipping persistent filespace %u because we are before persistence work",
				 filespaceOid);

		return;	// The initdb process will load the persistent table once we out of bootstrap mode.
	}

	PersistentFilespace_VerifyInitScan();

	PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid);

	topXid = GetTopTransactionId();

	WRITE_PERSISTENT_STATE_ORDERED_LOCK;

	PersistentFilespace_BlankPadCopyLocation(
										primaryFilespaceLocationBlankPadded,
										primaryFilespaceLocation);
	
	PersistentFilespace_BlankPadCopyLocation(
										mirrorFilespaceLocationBlankPadded,
										mirrorFilespaceLocation);

	GpPersistentFilespaceNode_SetDatumValues(
										values,
										filespaceOid,
										primaryDbId,
										primaryFilespaceLocationBlankPadded,
										mirrorDbId,
										mirrorFilespaceLocationBlankPadded,
										PersistentFileSysState_CreatePending,
										/* createMirrorDataLossTrackingSessionNum */ 0,
										mirrorExistenceState,
										/* reserved */ 0,
										/* parentXid */ topXid,
										/* persistentSerialNum */ 0);	// This will be set by PersistentFileSysObj_AddTuple.

	PersistentFileSysObj_AddTuple(
							PersistentFsObjType_FilespaceDir,
							values,
							flushToXLog,
							persistentTid,
							persistentSerialNum);


	WRITE_FILESPACE_HASH_LOCK;

	filespaceDirEntry =	PersistentFilespace_CreateDirUnderLock(filespaceOid);

	Assert(filespaceDirEntry != NULL);

	filespaceDirEntry->dbId1 = primaryDbId;
	memcpy(filespaceDirEntry->locationBlankPadded1, primaryFilespaceLocationBlankPadded,
		   FilespaceLocationBlankPaddedWithNullTermLen);
	
	filespaceDirEntry->dbId2 = mirrorDbId;
	memcpy(filespaceDirEntry->locationBlankPadded2, mirrorFilespaceLocationBlankPadded,
		   FilespaceLocationBlankPaddedWithNullTermLen);

	filespaceDirEntry->state = PersistentFileSysState_CreatePending;
	ItemPointerCopy(persistentTid, &filespaceDirEntry->persistentTid);
	filespaceDirEntry->persistentSerialNum = *persistentSerialNum;

	WRITE_FILESPACE_HASH_UNLOCK;

	/*
	 * This XLOG must be generated under the persistent write-lock.
	 */
#ifdef MASTER_MIRROR_SYNC
	mmxlog_log_create_filespace(filespaceOid);
#endif

	SIMPLE_FAULT_INJECTOR(FaultBeforePendingDeleteFilespaceEntry);

	/*
	 * MPP-18228
	 * To make adding 'Create Pending' entry to persistent table and adding
	 * to the PendingDelete list atomic
	 */
	PendingDelete_AddCreatePendingEntryWrapper(
								&fsObjName,
								persistentTid,
								*persistentSerialNum);

	WRITE_PERSISTENT_STATE_ORDERED_UNLOCK;

	if (Debug_persistent_print)
		elog(Persistent_DebugPrintLevel(),
		     "Persistent filespace directory: Add '%s' in state 'Created', mirror existence state '%s', serial number " INT64_FORMAT " at TID %s",
			 PersistentFileSysObjName_ObjectName(&fsObjName),
			 MirroredObjectExistenceState_Name(mirrorExistenceState),
			 *persistentSerialNum,
			 ItemPointerToString(persistentTid));
}
예제 #30
0
/*
 * Initialize a sequence's relation with the specified tuple as content
 */
static void
fill_seq_with_data(Relation rel, HeapTuple tuple)
{
	Buffer		buf;
	Page		page;
	sequence_magic *sm;
	OffsetNumber offnum;

	/* Initialize first page of relation with special magic number */

	buf = ReadBuffer(rel, P_NEW);
	Assert(BufferGetBlockNumber(buf) == 0);

	page = BufferGetPage(buf);

	PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
	sm = (sequence_magic *) PageGetSpecialPointer(page);
	sm->magic = SEQ_MAGIC;

	/* Now insert sequence tuple */

	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);

	/*
	 * Since VACUUM does not process sequences, we have to force the tuple to
	 * have xmin = FrozenTransactionId now.  Otherwise it would become
	 * invisible to SELECTs after 2G transactions.  It is okay to do this
	 * because if the current transaction aborts, no other xact will ever
	 * examine the sequence tuple anyway.
	 */
	HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
	HeapTupleHeaderSetXminFrozen(tuple->t_data);
	HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
	HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
	tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
	ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);

	/* check the comment above nextval_internal()'s equivalent call. */
	if (RelationNeedsWAL(rel))
		GetTopTransactionId();

	START_CRIT_SECTION();

	MarkBufferDirty(buf);

	offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len,
						 InvalidOffsetNumber, false, false);
	if (offnum != FirstOffsetNumber)
		elog(ERROR, "failed to add sequence tuple to page");

	/* XLOG stuff */
	if (RelationNeedsWAL(rel))
	{
		xl_seq_rec	xlrec;
		XLogRecPtr	recptr;

		XLogBeginInsert();
		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);

		xlrec.node = rel->rd_node;

		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
		XLogRegisterData((char *) tuple->t_data, tuple->t_len);

		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);

		PageSetLSN(page, recptr);
	}

	END_CRIT_SECTION();

	UnlockReleaseBuffer(buf);
}