/* * Individual logging of AccessExclusiveLocks for use during LockAcquire() */ void LogAccessExclusiveLock(Oid dbOid, Oid relOid) { xl_standby_lock xlrec; /* * Ensure that a TransactionId has been assigned to this transaction. We * don't actually need the xid yet but if we don't do this then * RecordTransactionCommit() and RecordTransactionAbort() will optimise * away the transaction completion record which recovery relies upon to * release locks. It's a hack, but for a corner case not worth adding code * for into the main commit path. */ xlrec.xid = GetTopTransactionId(); /* * Decode the locktag back to the original values, to avoid sending lots * of empty bytes with every message. See lock.h to check how a locktag * is defined for LOCKTAG_RELATION */ xlrec.dbOid = dbOid; xlrec.relOid = relOid; LogAccessExclusiveLocks(1, &xlrec); }
/* * txid_current() returns int8 * * Return the current toplevel transaction ID as TXID */ Datum txid_current(PG_FUNCTION_ARGS) { txid val; TxidEpoch state; load_xid_epoch(&state); val = convert_xid(GetTopTransactionId(), &state); PG_RETURN_INT64(val); }
/* * AtEOXact_Snapshot * Snapshot manager's cleanup function for end of transaction */ void AtEOXact_Snapshot(bool isCommit) { /* * In transaction-snapshot mode we must release our privately-managed * reference to the transaction snapshot. We must decrement * RegisteredSnapshots to keep the check below happy. But we don't bother * to do FreeSnapshot, for two reasons: the memory will go away with * TopTransactionContext anyway, and if someone has left the snapshot * stacked as active, we don't want the code below to be chasing through a * dangling pointer. */ if (FirstXactSnapshot != NULL) { Assert(FirstXactSnapshot->regd_count > 0); Assert(!pairingheap_is_empty(&RegisteredSnapshots)); pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); } FirstXactSnapshot = NULL; /* * If we exported any snapshots, clean them up. */ if (exportedSnapshots != NIL) { TransactionId myxid = GetTopTransactionId(); int i; char buf[MAXPGPATH]; ListCell *lc; /* * Get rid of the files. Unlink failure is only a WARNING because (1) * it's too late to abort the transaction, and (2) leaving a leaked * file around has little real consequence anyway. */ for (i = 1; i <= list_length(exportedSnapshots); i++) { XactExportFilePath(buf, myxid, i, ""); if (unlink(buf)) elog(WARNING, "could not unlink file \"%s\": %m", buf); } /* * As with the FirstXactSnapshot, we needn't spend any effort on * cleaning up the per-snapshot data structures, but we do need to * unlink them from RegisteredSnapshots to prevent a warning below. */ foreach(lc, exportedSnapshots) { Snapshot snap = (Snapshot) lfirst(lc); pairingheap_remove(&RegisteredSnapshots, &snap->ph_node); }
/* * xid_age - compute age of an XID (relative to current xact) */ Datum xid_age(PG_FUNCTION_ARGS) { TransactionId xid = PG_GETARG_TRANSACTIONID(0); TransactionId now = GetTopTransactionId(); /* Permanent XIDs are always infinitely old */ if (!TransactionIdIsNormal(xid)) PG_RETURN_INT32(INT_MAX); PG_RETURN_INT32((int32) (now - xid)); }
/* * LockGXact * Locate the prepared transaction and mark it busy for COMMIT or PREPARE. */ static GlobalTransaction LockGXact(const char *gid, Oid user) { int i; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; /* Ignore not-yet-valid GIDs */ if (!gxact->valid) continue; if (strcmp(gxact->gid, gid) != 0) continue; /* Found it, but has someone else got it locked? */ if (TransactionIdIsValid(gxact->locking_xid)) { if (TransactionIdIsActive(gxact->locking_xid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("prepared transaction with identifier \"%s\" is busy", gid))); gxact->locking_xid = InvalidTransactionId; } if (user != gxact->owner && !superuser_arg(user)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to finish prepared transaction"), errhint("Must be superuser or the user that prepared the transaction."))); /* OK for me to lock it */ gxact->locking_xid = GetTopTransactionId(); LWLockRelease(TwoPhaseStateLock); return gxact; } LWLockRelease(TwoPhaseStateLock); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("prepared transaction with identifier \"%s\" does not exist", gid))); /* NOTREACHED */ return NULL; }
/* * Individual logging of AccessExclusiveLocks for use during LockAcquire() */ void LogAccessExclusiveLock(Oid dbOid, Oid relOid) { xl_standby_lock xlrec; xlrec.xid = GetTopTransactionId(); /* * Decode the locktag back to the original values, to avoid sending lots * of empty bytes with every message. See lock.h to check how a locktag * is defined for LOCKTAG_RELATION */ xlrec.dbOid = dbOid; xlrec.relOid = relOid; LogAccessExclusiveLocks(1, &xlrec); }
/* * Prepare to log an AccessExclusiveLock, for use during LockAcquire() */ void LogAccessExclusiveLockPrepare(void) { /* * Ensure that a TransactionId has been assigned to this transaction, for * two reasons, both related to lock release on the standby. First, we * must assign an xid so that RecordTransactionCommit() and * RecordTransactionAbort() do not optimise away the transaction * completion record which recovery relies upon to release locks. It's a * hack, but for a corner case not worth adding code for into the main * commit path. Second, must must assign an xid before the lock is * recorded in shared memory, otherwise a concurrently executing * GetRunningTransactionLocks() might see a lock associated with an * InvalidTransactionId which we later assert cannot happen. */ (void) GetTopTransactionId(); }
/* * txid_current() returns int8 * * Return the current toplevel transaction ID as TXID * If the current transaction does not have one, one is assigned. */ Datum txid_current(PG_FUNCTION_ARGS) { txid val; TxidEpoch state; /* * Must prevent during recovery because if an xid is not assigned we try * to assign one, which would fail. Programs already rely on this function * to always return a valid current xid, so we should not change this to * return NULL or similar invalid xid. */ PreventCommandDuringRecovery("txid_current()"); load_xid_epoch(&state); val = convert_xid(GetTopTransactionId(), &state); PG_RETURN_INT64(val); }
/* * XactLockTableWait * * Wait for the specified transaction to commit or abort. * * Note that this does the right thing for subtransactions: if we wait on a * subtransaction, we will exit as soon as it aborts or its top parent commits. * It takes some extra work to ensure this, because to save on shared memory * the XID lock of a subtransaction is released when it ends, whether * successfully or unsuccessfully. So we have to check if it's "still running" * and if so wait for its parent. */ void XactLockTableWait(TransactionId xid) { LOCKTAG tag; for (;;) { Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionId())); SET_LOCKTAG_TRANSACTION(tag, xid); (void) LockAcquire(&tag, ShareLock, false, false); LockRelease(&tag, ShareLock, false); if (!TransactionIdIsInProgress(xid)) break; xid = SubTransGetParent(xid); } }
struct GridPointDataListIterator * getExtractGridDataReturnValues(FunctionCallInfo fcinfo) { struct PlaceSpecification ps; Datum placeSpec = PG_GETARG_DATUM(0); extractPlaceSpecification( & ps, & placeSpec ); GEOSGeom location = NULL; if ( ! PG_ARGISNULL(1) ) { bytea * locationRaw = PG_GETARG_BYTEA_P(1); location = GEOSGeomFromWKB_buf((unsigned char *) VARDATA(locationRaw), VARSIZE(locationRaw) - VARHDRSZ); } enum InterpolationType interpolation = (enum InterpolationType) PG_GETARG_INT32(2); FileId dataId = PG_GETARG_INT64(3); TransactionId xid = GetTopTransactionId(); CommandId cid = GetCurrentCommandId(true); // Incremented for each function call in the same transaction // function takes ownership of location parameter struct GridPointDataListIterator * ret = readPoints(& ps, location, interpolation, dataId, xid, cid); return ret; }
/* * fetch insert plan from cache. */ static void *load_insert_plan(Datum qname, struct QueueState *state) { struct InsertCacheEntry *entry; Oid queue_id = state->queue_id; bool did_exist = false; entry = hash_search(insert_cache, &queue_id, HASH_ENTER, &did_exist); if (did_exist) { if (entry->plan && state->cur_table == entry->cur_table) goto valid_table; if (entry->plan) SPI_freeplan(entry->plan); } entry->cur_table = state->cur_table; entry->last_xid = 0; entry->plan = NULL; /* this can fail, struct must be valid before */ entry->plan = make_plan(state); valid_table: if (state->per_tx_limit >= 0) { TransactionId xid = GetTopTransactionId(); if (entry->last_xid != xid) { entry->last_xid = xid; entry->last_count = 0; } entry->last_count++; if (entry->last_count > state->per_tx_limit) elog(ERROR, "Queue '%s' allows max %d events from one TX", TextDatumGetCString(qname), state->per_tx_limit); } return entry->plan; }
/* * ConditionalXactLockTableWait * * As above, but only lock if we can get the lock without blocking. * Returns TRUE if the lock was acquired. */ bool ConditionalXactLockTableWait(TransactionId xid) { LOCKTAG tag; for (;;) { Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionId())); SET_LOCKTAG_TRANSACTION(tag, xid); if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL) return false; LockRelease(&tag, ShareLock, false); if (!TransactionIdIsInProgress(xid)) break; xid = SubTransGetParent(xid); } return true; }
/* * AtEOXact_Snapshot * Snapshot manager's cleanup function for end of transaction */ void AtEOXact_Snapshot(bool isCommit) { /* * In transaction-snapshot mode we must release our privately-managed * reference to the transaction snapshot. We must decrement * RegisteredSnapshots to keep the check below happy. But we don't bother * to do FreeSnapshot, for two reasons: the memory will go away with * TopTransactionContext anyway, and if someone has left the snapshot * stacked as active, we don't want the code below to be chasing through * a dangling pointer. */ if (FirstXactSnapshot != NULL) { Assert(FirstXactSnapshot->regd_count > 0); Assert(RegisteredSnapshots > 0); RegisteredSnapshots--; } FirstXactSnapshot = NULL; /* * If we exported any snapshots, clean them up. */ if (exportedSnapshots != NIL) { TransactionId myxid = GetTopTransactionId(); int i; char buf[MAXPGPATH]; /* * Get rid of the files. Unlink failure is only a WARNING because * (1) it's too late to abort the transaction, and (2) leaving a * leaked file around has little real consequence anyway. */ for (i = 1; i <= list_length(exportedSnapshots); i++) { XactExportFilePath(buf, myxid, i, ""); if (unlink(buf)) elog(WARNING, "could not unlink file \"%s\": %m", buf); } /* * As with the FirstXactSnapshot, we needn't spend any effort on * cleaning up the per-snapshot data structures, but we do need to * adjust the RegisteredSnapshots count to prevent a warning below. * * Note: you might be thinking "why do we have the exportedSnapshots * list at all? All we need is a counter!". You're right, but we do * it this way in case we ever feel like improving xmin management. */ Assert(RegisteredSnapshots >= list_length(exportedSnapshots)); RegisteredSnapshots -= list_length(exportedSnapshots); exportedSnapshots = NIL; } /* On commit, complain about leftover snapshots */ if (isCommit) { ActiveSnapshotElt *active; if (RegisteredSnapshots != 0) elog(WARNING, "%d registered snapshots seem to remain after cleanup", RegisteredSnapshots); /* complain about unpopped active snapshots */ for (active = ActiveSnapshot; active != NULL; active = active->as_next) elog(WARNING, "snapshot %p still active", active); } /* * And reset our state. We don't need to free the memory explicitly -- * it'll go away with TopTransactionContext. */ ActiveSnapshot = NULL; RegisteredSnapshots = 0; CurrentSnapshot = NULL; SecondarySnapshot = NULL; FirstSnapshotSet = false; SnapshotResetXmin(); }
/* * Indicate we intend to create a filespace file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about filespace files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentFilespace_MarkCreatePending( Oid filespaceOid, char *filespaceLocation, ItemPointer persistentTid, int64 *persistentSerialNum, bool flushToXLog) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; FilespaceDirEntry filespaceDirEntry; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid,is_filespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; filespaceDirEntry = PersistentFilespace_CreateDirUnderLock(filespaceOid); if (filespaceDirEntry == NULL) { /* If out of shared memory, no need to promote to PANIC. */ WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Out of shared-memory for persistent filespaces"), errhint("You may need to increase the gp_max_filespaces value"), errOmitLocation(true))); } PersistentFilespace_BlankPadCopyLocation( filespaceDirEntry->locationBlankPadded1, filespaceLocation); filespaceDirEntry->state = PersistentFileSysState_CreatePending; PersistentFilespace_AddTuple( filespaceDirEntry, /* createMirrorDataLossTrackingSessionNum */ 0, /* reserved */ 0, /* parentXid */ GetTopTransactionId(), flushToXLog); *persistentTid = filespaceDirEntry->persistentTid; *persistentSerialNum = filespaceDirEntry->persistentSerialNum; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_filespace(filespaceOid); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteFilespaceEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: Add '%s' in state 'Created', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), *persistentSerialNum, ItemPointerToString(persistentTid)); }
/* * StrategyBufferLookup * * Lookup a page request in the cache directory. A buffer is only * returned for a T1 or T2 cache hit. B1 hits are just * remembered here, to possibly affect the behaviour later. * * recheck indicates we are rechecking after I/O wait; do not change * internal status in this case. * * *cdb_found_index is set to the index of the found CDB, or -1 if none. * This is not intended to be used by the caller, except to pass to * StrategyReplaceBuffer(). */ BufferDesc * StrategyBufferLookup(BufferTag *tagPtr, bool recheck, int *cdb_found_index) { BufferStrategyCDB *cdb; /* Optional stats printout */ if (DebugSharedBuffers > 0) StrategyStatsDump(); /* * Count lookups */ StrategyControl->num_lookup++; /* * Lookup the block in the shared hash table */ *cdb_found_index = BufTableLookup(tagPtr); /* * Done if complete CDB lookup miss */ if (*cdb_found_index < 0) return NULL; /* * We found a CDB */ cdb = &StrategyCDB[*cdb_found_index]; /* * Count hits */ StrategyControl->num_hit[cdb->list]++; /* * If this is a T2 hit, we simply move the CDB to the T2 MRU position * and return the found buffer. * * A CDB in T2 cannot have t1_vacuum set, so we needn't check. However, * if the current process is VACUUM then it doesn't promote to MRU. */ if (cdb->list == STRAT_LIST_T2) { if (!strategy_hint_vacuum) { STRAT_LIST_REMOVE(cdb); STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); } return &BufferDescriptors[cdb->buf_id]; } /* * If this is a T1 hit, we move the buffer to the T2 MRU only if * another transaction had read it into T1, *and* neither transaction * is a VACUUM. This is required because any UPDATE or DELETE in * PostgreSQL does multiple ReadBuffer(), first during the scan, later * during the heap_update() or heap_delete(). Otherwise move to T1 * MRU. VACUUM doesn't even get to make that happen. */ if (cdb->list == STRAT_LIST_T1) { if (!strategy_hint_vacuum) { if (!cdb->t1_vacuum && !TransactionIdEquals(cdb->t1_xid, GetTopTransactionId())) { STRAT_LIST_REMOVE(cdb); STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); } else { STRAT_LIST_REMOVE(cdb); STRAT_MRU_INSERT(cdb, STRAT_LIST_T1); /* * If a non-VACUUM process references a page recently * loaded by VACUUM, clear the stigma; the state will now * be the same as if this process loaded it originally. */ if (cdb->t1_vacuum) { cdb->t1_xid = GetTopTransactionId(); cdb->t1_vacuum = false; } } } return &BufferDescriptors[cdb->buf_id]; } /* * Even though we had seen the block in the past, its data is not * currently in memory ... cache miss to the bufmgr. */ Assert(cdb->list == STRAT_LIST_B1); return NULL; }
/* * StrategyReplaceBuffer * * Called by the buffer manager to inform us that he flushed a buffer * and is now about to replace the content. Prior to this call, * the cache algorithm still reports the buffer as in the cache. After * this call we report the new block, even if IO might still need to * be done to bring in the new content. * * cdb_found_index and cdb_replace_index must be the auxiliary values * returned by previous calls to StrategyBufferLookup and StrategyGetBuffer. */ void StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag, int cdb_found_index, int cdb_replace_index) { BufferStrategyCDB *cdb_found; BufferStrategyCDB *cdb_replace; if (cdb_found_index >= 0) { /* This must have been a ghost buffer cache hit (B1 list) */ cdb_found = &StrategyCDB[cdb_found_index]; /* Assert that the buffer remembered in cdb_found is the one */ /* the buffer manager is currently faulting in */ Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag)); if (cdb_replace_index >= 0) { /* We are satisfying it with an evicted T buffer */ cdb_replace = &StrategyCDB[cdb_replace_index]; /* Assert that the buffer remembered in cdb_replace is */ /* the one the buffer manager has just evicted */ Assert(cdb_replace->list == STRAT_LIST_T1 || cdb_replace->list == STRAT_LIST_T2); Assert(cdb_replace->buf_id == buf->buf_id); Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag)); /* * Under normal circumstances we move evicted T1 list entries * to the B1 list. However, T1 entries that exist only because * of VACUUM are just thrown into the unused list instead, * since it's unlikely they'll be touched again soon. Similarly, * evicted T2 entries are thrown away; the LRU T2 entry cannot * have been touched recently. */ if (cdb_replace->t1_vacuum || cdb_replace->list == STRAT_LIST_T2) { BufTableDelete(&(cdb_replace->buf_tag)); STRAT_LIST_REMOVE(cdb_replace); cdb_replace->next = StrategyControl->listUnusedCDB; StrategyControl->listUnusedCDB = cdb_replace_index; } else { STRAT_LIST_REMOVE(cdb_replace); STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); } /* And clear its block reference */ cdb_replace->buf_id = -1; } else { /* We are satisfying it with an unused buffer */ } /* Now the found B1 CDB gets the buffer and is moved to T2 */ cdb_found->buf_id = buf->buf_id; STRAT_LIST_REMOVE(cdb_found); STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2); } else { /* * This was a complete cache miss, so we need to create a new CDB. * We use a free one if available, else reclaim the tail end of B1. */ if (StrategyControl->listUnusedCDB >= 0) { cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB]; StrategyControl->listUnusedCDB = cdb_found->next; } else { /* Can't fail because we have more CDBs than buffers... */ if (B1_LENGTH == 0) elog(PANIC, "StrategyReplaceBuffer: out of CDBs"); cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; BufTableDelete(&(cdb_found->buf_tag)); STRAT_LIST_REMOVE(cdb_found); } /* Set the CDB's buf_tag and insert it into the hash table */ cdb_found->buf_tag = *newTag; BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB)); if (cdb_replace_index >= 0) { /* * The buffer was formerly in a T list, move its CDB to the * appropriate list: B1 if T1, else discard it, as above */ cdb_replace = &StrategyCDB[cdb_replace_index]; Assert(cdb_replace->list == STRAT_LIST_T1 || cdb_replace->list == STRAT_LIST_T2); Assert(cdb_replace->buf_id == buf->buf_id); Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag)); if (cdb_replace->list == STRAT_LIST_T1) { STRAT_LIST_REMOVE(cdb_replace); STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); } else { BufTableDelete(&(cdb_replace->buf_tag)); STRAT_LIST_REMOVE(cdb_replace); cdb_replace->next = StrategyControl->listUnusedCDB; StrategyControl->listUnusedCDB = cdb_replace_index; } /* And clear its block reference */ cdb_replace->buf_id = -1; } else { /* We are satisfying it with an unused buffer */ } /* Assign the buffer id to the new CDB */ cdb_found->buf_id = buf->buf_id; /* * Specialized VACUUM optimization. If this complete cache miss * happened because vacuum needed the page, we place it at the LRU * position of T1; normally it goes at the MRU position. */ if (strategy_hint_vacuum) { if (TransactionIdEquals(strategy_vacuum_xid, GetTopTransactionId())) STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1); else { /* VACUUM must have been aborted by error, reset flag */ strategy_hint_vacuum = false; STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); } } else STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); /* * Remember the Xid when this buffer went onto T1 to avoid a * single UPDATE promoting a newcomer straight into T2. Also * remember if it was loaded for VACUUM. */ cdb_found->t1_xid = GetTopTransactionId(); cdb_found->t1_vacuum = strategy_hint_vacuum; } }
/* * Indicate we intend to create a relation file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about relation files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentDatabase_MarkCreatePending( DbDirNode *dbDirNode, ItemPointer persistentTid, int64 *persistentSerialNum, bool flushToXLog) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; DatabaseDirEntry databaseDirEntry; SharedOidSearchAddResult addResult; PersistentFileSysObjName fsObjName; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent database '%s' because we are before persistence work", GetDatabasePath( dbDirNode->database, dbDirNode->tablespace)); /* * The initdb process will load the persistent table once we * out of bootstrap mode. */ return; } PersistentDatabase_VerifyInitScan(); PersistentFileSysObjName_SetDatabaseDir( &fsObjName, dbDirNode->tablespace, dbDirNode->database, is_tablespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; databaseDirEntry = (DatabaseDirEntry) SharedOidSearch_Find( &persistentDatabaseSharedData->databaseDirSearchTable, dbDirNode->database, dbDirNode->tablespace); if (databaseDirEntry != NULL) elog(ERROR, "Persistent database entry '%s' already exists in state '%s'", GetDatabasePath( dbDirNode->database, dbDirNode->tablespace), PersistentFileSysObjState_Name(databaseDirEntry->state)); addResult = SharedOidSearch_Add( &persistentDatabaseSharedData->databaseDirSearchTable, dbDirNode->database, dbDirNode->tablespace, (SharedOidSearchObjHeader**)&databaseDirEntry); if (addResult == SharedOidSearchAddResult_NoMemory) { /* If out of shared memory, no need to promote to PANIC. */ WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Out of shared-memory for persistent databases"), errhint("You may need to increase the gp_max_databases and " "gp_max_tablespaces value"), errOmitLocation(true))); } else if (addResult == SharedOidSearchAddResult_Exists) elog(PANIC, "Persistent database entry '%s' already exists in state '%s'", GetDatabasePath( dbDirNode->database, dbDirNode->tablespace), PersistentFileSysObjState_Name(databaseDirEntry->state)); else Assert(addResult == SharedOidSearchAddResult_Ok); databaseDirEntry->state = PersistentFileSysState_CreatePending; databaseDirEntry->iteratorRefCount = 0; PersistentDatabase_AddTuple( databaseDirEntry, /* reserved */ 0, /* parentXid */ GetTopTransactionId(), flushToXLog); *persistentTid = databaseDirEntry->persistentTid; *persistentSerialNum = databaseDirEntry->persistentSerialNum; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_database(dbDirNode->tablespace, dbDirNode->database); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteDatabaseEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; }
/* * AlterSequence * * Modify the definition of a sequence relation */ ObjectAddress AlterSequence(AlterSeqStmt *stmt) { Oid relid; SeqTable elm; Relation seqrel; Buffer buf; HeapTupleData seqtuple; Form_pg_sequence seq; FormData_pg_sequence new___; List *owned_by; ObjectAddress address; /* Open and lock sequence. */ relid = RangeVarGetRelid(stmt->sequence, AccessShareLock, stmt->missing_ok); if (relid == InvalidOid) { ereport(NOTICE, (errmsg("relation \"%s\" does not exist, skipping", stmt->sequence->relname))); return InvalidObjectAddress; } init_sequence(relid, &elm, &seqrel); /* allow ALTER to sequence owner only */ if (!pg_class_ownercheck(relid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, stmt->sequence->relname); /* lock page' buffer and read tuple into new___ sequence structure */ seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple); /* Copy old values of options into workspace */ memcpy(&new___, seq, sizeof(FormData_pg_sequence)); /* Check and set new___ values */ init_params(stmt->options, false, &new___, &owned_by); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ elm->cached = elm->last; /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); /* Now okay to update the on-disk tuple */ START_CRIT_SECTION(); memcpy(seq, &new___, sizeof(FormData_pg_sequence)); MarkBufferDirty(buf); /* XLOG stuff */ if (RelationNeedsWAL(seqrel)) { xl_seq_rec xlrec; XLogRecPtr recptr; Page page = BufferGetPage(buf); XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); xlrec.node = seqrel->rd_node; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); PageSetLSN(page, recptr); } END_CRIT_SECTION(); UnlockReleaseBuffer(buf); /* process OWNED BY if given */ if (owned_by) process_owned_by(seqrel, owned_by); InvokeObjectPostAlterHook(RelationRelationId, relid, 0); ObjectAddressSet(address, RelationRelationId, relid); relation_close(seqrel, NoLock); return address; }
/* * Indicate we intend to create a tablespace file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about tablespace files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentTablespace_MarkCreatePending( Oid filespaceOid, /* The filespace where the tablespace lives. */ Oid tablespaceOid, /* The tablespace OID for the create. */ MirroredObjectExistenceState mirrorExistenceState, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the rel file */ int64 *persistentSerialNum, bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; TablespaceDirEntry tablespaceDirEntry; TransactionId topXid; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent tablespace %u because we are before persistence work", tablespaceOid); return; /* * The initdb process will load the persistent table once we out of * bootstrap mode. */ } PersistentTablespace_VerifyInitScan(); PersistentFileSysObjName_SetTablespaceDir(&fsObjName, tablespaceOid); topXid = GetTopTransactionId(); WRITE_PERSISTENT_STATE_ORDERED_LOCK; PersistentTablespace_AddTuple( filespaceOid, tablespaceOid, PersistentFileSysState_CreatePending, /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, /* reserved */ 0, /* parentXid */ topXid, flushToXLog, persistentTid, persistentSerialNum); WRITE_TABLESPACE_HASH_LOCK; tablespaceDirEntry = PersistentTablespace_CreateEntryUnderLock(filespaceOid, tablespaceOid); Assert(tablespaceDirEntry != NULL); tablespaceDirEntry->state = PersistentFileSysState_CreatePending; ItemPointerCopy(persistentTid, &tablespaceDirEntry->persistentTid); tablespaceDirEntry->persistentSerialNum = *persistentSerialNum; WRITE_TABLESPACE_HASH_UNLOCK; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_tablespace( filespaceOid, tablespaceOid); #endif SIMPLE_FAULT_INJECTOR(FaultBeforePendingDeleteTablespaceEntry); /* * MPP-18228 To make adding 'Create Pending' entry to persistent table and * adding to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent tablespace directory: Add '%s' in state 'Created', mirror existence state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), MirroredObjectExistenceState_Name(mirrorExistenceState), *persistentSerialNum, ItemPointerToString(persistentTid)); }
/* * StrategyHintVacuum -- tell us whether VACUUM is active */ void StrategyHintVacuum(bool vacuum_active) { strategy_hint_vacuum = vacuum_active; strategy_vacuum_xid = GetTopTransactionId(); }
/* * Indicate we intend to create a relation file as part of the current transaction. * * This function adds an entry in 'gp_persistent_relation_node' for either a new table (segment file * # 0) or a new segment file under AO table (segment file # > 0 for row/column-oriented AO) with a state * 'Create Pending'. An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * Paramaters * ----------- * relFileNode = The tablespace, database, and relation OIDs for the create * segmentFileNum = As the name implies ( 0 for heap * >= 0 for RO/CO AO as applicable) * relStorageMgr = Persistent Relation storage Manager * relBufpoolKind = Buffer pool type beneath corrosponding relation * TODO bufferPollBulkLoad = ??? * TODO mirrorExistenceState = ??? * TODO relDataSynchronizationState = ??? * flushToXlog = If true, the XLOG record for this change will be flushed to disk. * TODO isLocalBuf = ??? * * Return * ------ * relationName = Name of the relation used for either debugging or to store in PendingDelete LL. * persistentTid = Resulting TID of the gp_persistent_rel_files tuple for the relation * serialNum = Resulting serial number for the relation. Distinquishes the uses of the tuple */ void PersistentRelation_AddCreatePending( RelFileNode *relFileNode, int32 segmentFileNum, PersistentFileSysRelStorageMgr relStorageMgr, PersistentFileSysRelBufpoolKind relBufpoolKind, bool bufferPoolBulkLoad, MirroredObjectExistenceState mirrorExistenceState, MirroredRelDataSynchronizationState relDataSynchronizationState, char *relationName, ItemPointer persistentTid, int64 *serialNum, bool flushToXLog, bool isLocalBuf) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; XLogRecPtr mirrorBufpoolResyncCkptLoc; ItemPointerData previousFreeTid; Datum values[Natts_gp_persistent_relation_node]; if(RelFileNode_IsEmpty(relFileNode)) elog(ERROR, "Invalid RelFileNode (0,0,0)"); MemSet(&previousFreeTid, 0, sizeof(ItemPointerData)); MemSet(&mirrorBufpoolResyncCkptLoc, 0, sizeof(XLogRecPtr)); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); MemSet(persistentTid, 0, sizeof(ItemPointerData)); *serialNum = 0; return; // The initdb process will load the persistent table once we out of bootstrap mode. } /* Verify if the needed shared mem data structures for persistent tables are setup and inited */ PersistentRelation_VerifyInitScan(); /* Setup the file system object name */ PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, segmentFileNum); WRITE_PERSISTENT_STATE_ORDERED_LOCK; /* Create a values array which will be used to create a 'gp_persistent_relation_node' tuple */ GpPersistentRelationNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum, relStorageMgr, (bufferPoolBulkLoad ? PersistentFileSysState_BulkLoadCreatePending : PersistentFileSysState_CreatePending), /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, relDataSynchronizationState, /* mirrorBufpoolMarkedForScanIncrementalResync */ false, /* mirrorBufpoolResyncChangedPageCount */ 0, &mirrorBufpoolResyncCkptLoc, /* mirrorBufpoolResyncCkptBlockNum */ 0, /* mirrorAppendOnlyLossEof */ 0, /* mirrorAppendOnlyNewEof */ 0, relBufpoolKind, GetTopTransactionId(), /* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple. &previousFreeTid); /* Add a new tuple to 'gp_persistent_relation_node' table for the new relation/segment file * we intend to create. This will also create and apply a new persistent serial number. */ PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationFile, values, flushToXLog, persistentTid, serialNum); /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_relfilenode( relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, segmentFileNum); #endif #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FaultBeforePendingDeleteRelationEntry, DDLNotSpecified, "", // databaseName ""); // tableName #endif /* We'll add an entry to the PendingDelete LinkedList (LL) to remeber what we * created in this transaction (or sub-transaction). If the transaction * aborts then we can search for all such entries in this LL and get rid of (delete) * such relations or segment files on the disk. * * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingRelationEntry( &fsObjName, persistentTid, serialNum, relStorageMgr, relationName, isLocalBuf, bufferPoolBulkLoad); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent relation: Add '%s', relation name '%s' in state 'Create Pending', relation storage manager '%s', mirror existence state '%s', relation data resynchronization state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), relationName, PersistentFileSysRelStorageMgr_Name(relStorageMgr), MirroredObjectExistenceState_Name(mirrorExistenceState), MirroredRelDataSynchronizationState_Name(relDataSynchronizationState), *serialNum, ItemPointerToString(persistentTid)); }
Datum _Slony_I_createEvent(PG_FUNCTION_ARGS) { TransactionId newXid = GetTopTransactionId(); Slony_I_ClusterStatus *cs; char *ev_type_c; Datum argv[9]; char nulls[10]; char *buf; size_t buf_size; int rc; int i; int64 retval; bool isnull; #ifdef HAVE_GETACTIVESNAPSHOT if (GetActiveSnapshot() == NULL) elog(ERROR, "Slony-I: ActiveSnapshot is NULL in createEvent()"); #else if (SerializableSnapshot == NULL) elog(ERROR, "Slony-I: SerializableSnapshot is NULL in createEvent()"); #endif if ((rc = SPI_connect()) < 0) elog(ERROR, "Slony-I: SPI_connect() failed in createEvent()"); /* * Get or create the cluster status information and make sure it has the * SPI plans that we need here. */ cs = getClusterStatus(PG_GETARG_NAME(0), PLAN_INSERT_EVENT); buf_size = 8192; buf = palloc(buf_size); /* * Do the following only once per transaction. */ if (!TransactionIdEquals(cs->currentXid, newXid)) { cs->currentXid = newXid; } /* * Call the saved INSERT plan */ for (i = 1; i < 10; i++) { if (i >= PG_NARGS() || PG_ARGISNULL(i)) { argv[i - 1] = (Datum) 0; nulls[i - 1] = 'n'; } else { argv[i - 1] = PG_GETARG_DATUM(i); nulls[i - 1] = ' '; } } nulls[9] = '\0'; if ((rc = SPI_execp(cs->plan_insert_event, argv, nulls, 0)) < 0) elog(ERROR, "Slony-I: SPI_execp() failed for \"INSERT INTO sl_event ...\""); /* * The INSERT plan also contains a SELECT currval('sl_event_seq'), use the * new sequence number as return value. */ if (SPI_processed != 1) elog(ERROR, "Slony-I: INSERT plan did not return 1 result row"); retval = DatumGetInt64(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); /* * For SYNC and ENABLE_SUBSCRIPTION events, we also remember all current * sequence values. */ if (PG_NARGS() > 1 && !PG_ARGISNULL(1)) { ev_type_c = DatumGetPointer(DirectFunctionCall1( textout, PG_GETARG_DATUM(1))); if (strcmp(ev_type_c, "SYNC") == 0 || strcmp(ev_type_c, "ENABLE_SUBSCRIPTION") == 0) { /*@-nullpass@*/ if ((rc = SPI_execp(cs->plan_record_sequences, NULL, NULL, 0)) < 0) elog(ERROR, "Slony-I: SPI_execp() failed for \"INSERT INTO sl_seqlog ...\""); /*@+nullpass@*/ } } (void) SPI_finish(); /*@-mustfreefresh@*/ PG_RETURN_INT64(retval); }
/* * Main internal procedure that handles 2 & 3 arg forms of SETVAL. * * Note that the 3 arg version (which sets the is_called flag) is * only for use in pg_dump, and setting the is_called flag may not * work if multiple users are attached to the database and referencing * the sequence (unlikely if pg_dump is restoring it). * * It is necessary to have the 3 arg version so that pg_dump can * restore the state of a sequence exactly during data-only restores - * it is the only way to clear the is_called flag in an existing * sequence. */ static void do_setval(Oid relid, int64 next, bool iscalled) { SeqTable elm; Relation seqrel; Buffer buf; HeapTupleData seqtuple; Form_pg_sequence seq; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ if (!seqrel->rd_islocaltemp) PreventCommandIfReadOnly("setval()"); /* * Forbid this during parallel operation because, to make it work, * the cooperating backends would need to share the backend-local cached * sequence information. Currently, we don't support that. */ PreventCommandIfParallelMode("setval()"); /* lock page' buffer and read tuple */ seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple); if ((next < seq->min_value) || (next > seq->max_value)) { char bufv[100], bufm[100], bufx[100]; snprintf(bufv, sizeof(bufv), INT64_FORMAT, next); snprintf(bufm, sizeof(bufm), INT64_FORMAT, seq->min_value); snprintf(bufx, sizeof(bufx), INT64_FORMAT, seq->max_value); ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("setval: value %s is out of bounds for sequence \"%s\" (%s..%s)", bufv, RelationGetRelationName(seqrel), bufm, bufx))); } /* Set the currval() state only if iscalled = true */ if (iscalled) { elm->last = next; /* last returned number */ elm->last_valid = true; } /* In any case, forget any future cached numbers */ elm->cached = elm->last; /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); /* ready to change the on-disk (or really, in-buffer) tuple */ START_CRIT_SECTION(); seq->last_value = next; /* last fetched number */ seq->is_called = iscalled; seq->log_cnt = 0; MarkBufferDirty(buf); /* XLOG stuff */ if (RelationNeedsWAL(seqrel)) { xl_seq_rec xlrec; XLogRecPtr recptr; Page page = BufferGetPage(buf); XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); xlrec.node = seqrel->rd_node; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); PageSetLSN(page, recptr); } END_CRIT_SECTION(); UnlockReleaseBuffer(buf); relation_close(seqrel, NoLock); }
static int64 nextval_internal(Oid relid) { SeqTable elm; Relation seqrel; Buffer buf; Page page; HeapTupleData seqtuple; Form_pg_sequence seq; int64 incby, maxv, minv, cache, log, fetch, last; int64 result, next, rescnt = 0; bool logit = false; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_USAGE | ACL_UPDATE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ if (!seqrel->rd_islocaltemp) PreventCommandIfReadOnly("nextval()"); /* * Forbid this during parallel operation because, to make it work, * the cooperating backends would need to share the backend-local cached * sequence information. Currently, we don't support that. */ PreventCommandIfParallelMode("nextval()"); if (elm->last != elm->cached) /* some numbers were cached */ { Assert(elm->last_valid); Assert(elm->increment != 0); elm->last += elm->increment; relation_close(seqrel, NoLock); last_used_seq = elm; return elm->last; } /* lock page' buffer and read tuple */ seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple); page = BufferGetPage(buf); last = next = result = seq->last_value; incby = seq->increment_by; maxv = seq->max_value; minv = seq->min_value; fetch = cache = seq->cache_value; log = seq->log_cnt; if (!seq->is_called) { rescnt++; /* return last_value if not is_called */ fetch--; } /* * Decide whether we should emit a WAL log record. If so, force up the * fetch count to grab SEQ_LOG_VALS more values than we actually need to * cache. (These will then be usable without logging.) * * If this is the first nextval after a checkpoint, we must force a new___ * WAL record to be written anyway, else replay starting from the * checkpoint would fail to advance the sequence past the logged values. * In this case we may as well fetch extra values. */ if (log < fetch || !seq->is_called) { /* forced log to satisfy local demand for values */ fetch = log = fetch + SEQ_LOG_VALS; logit = true; } else { XLogRecPtr redoptr = GetRedoRecPtr(); if (PageGetLSN(page) <= redoptr) { /* last update of seq was before checkpoint */ fetch = log = fetch + SEQ_LOG_VALS; logit = true; } } while (fetch) /* try to fetch cache [+ log ] numbers */ { /* * Check MAXVALUE for ascending sequences and MINVALUE for descending * sequences */ if (incby > 0) { /* ascending sequence */ if ((maxv >= 0 && next > maxv - incby) || (maxv < 0 && next + incby > maxv)) { if (rescnt > 0) break; /* stop fetching */ if (!seq->is_cycled) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, maxv); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("nextval: reached maximum value of sequence \"%s\" (%s)", RelationGetRelationName(seqrel), buf))); } next = minv; } else next += incby; } else { /* descending sequence */ if ((minv < 0 && next < minv - incby) || (minv >= 0 && next + incby < minv)) { if (rescnt > 0) break; /* stop fetching */ if (!seq->is_cycled) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, minv); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("nextval: reached minimum value of sequence \"%s\" (%s)", RelationGetRelationName(seqrel), buf))); } next = maxv; } else next += incby; } fetch--; if (rescnt < cache) { log--; rescnt++; last = next; if (rescnt == 1) /* if it's first result - */ result = next; /* it's what to return */ } } log -= fetch; /* adjust for any unfetched numbers */ Assert(log >= 0); /* save info in local cache */ elm->last = result; /* last returned number */ elm->cached = last; /* last fetched number */ elm->last_valid = true; last_used_seq = elm; /* * If something needs to be WAL logged, acquire an xid, so this * transaction's commit will trigger a WAL flush and wait for * syncrep. It's sufficient to ensure the toplevel transaction has an xid, * no need to assign xids subxacts, that'll already trigger an appropriate * wait. (Have to do that here, so we're outside the critical section) */ if (logit && RelationNeedsWAL(seqrel)) GetTopTransactionId(); /* ready to change the on-disk (or really, in-buffer) tuple */ START_CRIT_SECTION(); /* * We must mark the buffer dirty before doing XLogInsert(); see notes in * SyncOneBuffer(). However, we don't apply the desired changes just yet. * This looks like a violation of the buffer update protocol, but it is in * fact safe because we hold exclusive lock on the buffer. Any other * process, including a checkpoint, that tries to examine the buffer * contents will block until we release the lock, and then will see the * final state that we install below. */ MarkBufferDirty(buf); /* XLOG stuff */ if (logit && RelationNeedsWAL(seqrel)) { xl_seq_rec xlrec; XLogRecPtr recptr; /* * We don't log the current state of the tuple, but rather the state * as it would appear after "log" more fetches. This lets us skip * that many future WAL records, at the cost that we lose those * sequence values if we crash. */ XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); /* set values that will be saved in xlog */ seq->last_value = next; seq->is_called = true; seq->log_cnt = 0; xlrec.node = seqrel->rd_node; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); PageSetLSN(page, recptr); } /* Now update sequence tuple to the intended final state */ seq->last_value = last; /* last fetched number */ seq->is_called = true; seq->log_cnt = log; /* how much is logged */ END_CRIT_SECTION(); UnlockReleaseBuffer(buf); relation_close(seqrel, NoLock); return result; }
/* * ExportSnapshot * Export the snapshot to a file so that other backends can import it. * Returns the token (the file name) that can be used to import this * snapshot. */ static char * ExportSnapshot(Snapshot snapshot) { TransactionId topXid; TransactionId *children; int nchildren; int addTopXid; StringInfoData buf; FILE *f; int i; MemoryContext oldcxt; char path[MAXPGPATH]; char pathtmp[MAXPGPATH]; /* * It's tempting to call RequireTransactionChain here, since it's not * very useful to export a snapshot that will disappear immediately * afterwards. However, we haven't got enough information to do that, * since we don't know if we're at top level or not. For example, we * could be inside a plpgsql function that is going to fire off other * transactions via dblink. Rather than disallow perfectly legitimate * usages, don't make a check. * * Also note that we don't make any restriction on the transaction's * isolation level; however, importers must check the level if they * are serializable. */ /* * This will assign a transaction ID if we do not yet have one. */ topXid = GetTopTransactionId(); /* * We cannot export a snapshot from a subtransaction because there's no * easy way for importers to verify that the same subtransaction is still * running. */ if (IsSubTransaction()) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot export a snapshot from a subtransaction"))); /* * We do however allow previous committed subtransactions to exist. * Importers of the snapshot must see them as still running, so get their * XIDs to add them to the snapshot. */ nchildren = xactGetCommittedChildren(&children); /* * Copy the snapshot into TopTransactionContext, add it to the * exportedSnapshots list, and mark it pseudo-registered. We do this to * ensure that the snapshot's xmin is honored for the rest of the * transaction. (Right now, because SnapshotResetXmin is so stupid, this * is overkill; but later we might make that routine smarter.) */ snapshot = CopySnapshot(snapshot); oldcxt = MemoryContextSwitchTo(TopTransactionContext); exportedSnapshots = lappend(exportedSnapshots, snapshot); MemoryContextSwitchTo(oldcxt); snapshot->regd_count++; RegisteredSnapshots++; /* * Fill buf with a text serialization of the snapshot, plus identification * data about this transaction. The format expected by ImportSnapshot * is pretty rigid: each line must be fieldname:value. */ initStringInfo(&buf); appendStringInfo(&buf, "xid:%u\n", topXid); appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId); appendStringInfo(&buf, "iso:%d\n", XactIsoLevel); appendStringInfo(&buf, "ro:%d\n", XactReadOnly); appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin); appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax); /* * We must include our own top transaction ID in the top-xid data, since * by definition we will still be running when the importing transaction * adopts the snapshot, but GetSnapshotData never includes our own XID in * the snapshot. (There must, therefore, be enough room to add it.) * * However, it could be that our topXid is after the xmax, in which case * we shouldn't include it because xip[] members are expected to be before * xmax. (We need not make the same check for subxip[] members, see * snapshot.h.) */ addTopXid = TransactionIdPrecedes(topXid, snapshot->xmax) ? 1 : 0; appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid); for (i = 0; i < snapshot->xcnt; i++) appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]); if (addTopXid) appendStringInfo(&buf, "xip:%u\n", topXid); /* * Similarly, we add our subcommitted child XIDs to the subxid data. * Here, we have to cope with possible overflow. */ if (snapshot->suboverflowed || snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount()) appendStringInfoString(&buf, "sof:1\n"); else { appendStringInfoString(&buf, "sof:0\n"); appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); for (i = 0; i < nchildren; i++) appendStringInfo(&buf, "sxp:%u\n", children[i]); } appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery); /* * Now write the text representation into a file. We first write to a * ".tmp" filename, and rename to final filename if no error. This * ensures that no other backend can read an incomplete file * (ImportSnapshot won't allow it because of its valid-characters check). */ XactExportFilePath(pathtmp, topXid, list_length(exportedSnapshots), ".tmp"); if (!(f = AllocateFile(pathtmp, PG_BINARY_W))) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", pathtmp))); if (fwrite(buf.data, buf.len, 1, f) != 1) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", pathtmp))); /* no fsync() since file need not survive a system crash */ if (FreeFile(f)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", pathtmp))); /* * Now that we have written everything into a .tmp file, rename the file * to remove the .tmp suffix. */ XactExportFilePath(path, topXid, list_length(exportedSnapshots), ""); if (rename(pathtmp, path) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\": %m", pathtmp, path))); /* * The basename of the file is what we return from pg_export_snapshot(). * It's already in path in a textual format and we know that the path * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash * and pstrdup it so as not to return the address of a local variable. */ return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1); }
Datum _Slony_I_logTrigger(PG_FUNCTION_ARGS) { TransactionId newXid = GetTopTransactionId(); Slony_I_ClusterStatus *cs; TriggerData *tg; Datum argv[4]; text *cmdtype = NULL; int rc; Name cluster_name; int32 tab_id; char *attkind; int attkind_idx; int cmddata_need; /* * Don't do any logging if the current session role isn't Origin. */ if (SessionReplicationRole != SESSION_REPLICATION_ROLE_ORIGIN) return PointerGetDatum(NULL); /* * Get the trigger call context */ if (!CALLED_AS_TRIGGER(fcinfo)) elog(ERROR, "Slony-I: logTrigger() not called as trigger"); tg = (TriggerData *) (fcinfo->context); /* * Check all logTrigger() calling conventions */ if (!TRIGGER_FIRED_AFTER(tg->tg_event)) elog(ERROR, "Slony-I: logTrigger() must be fired AFTER"); if (!TRIGGER_FIRED_FOR_ROW(tg->tg_event)) elog(ERROR, "Slony-I: logTrigger() must be fired FOR EACH ROW"); if (tg->tg_trigger->tgnargs != 3) elog(ERROR, "Slony-I: logTrigger() must be defined with 3 args"); /* * Connect to the SPI manager */ if ((rc = SPI_connect()) < 0) elog(ERROR, "Slony-I: SPI_connect() failed in createEvent()"); /* * Get all the trigger arguments */ cluster_name = DatumGetName(DirectFunctionCall1(namein, CStringGetDatum(tg->tg_trigger->tgargs[0]))); tab_id = strtol(tg->tg_trigger->tgargs[1], NULL, 10); attkind = tg->tg_trigger->tgargs[2]; /* * Get or create the cluster status information and make sure it has the * SPI plans that we need here. */ cs = getClusterStatus(cluster_name, PLAN_INSERT_LOG); /* * Do the following only once per transaction. */ if (!TransactionIdEquals(cs->currentXid, newXid)) { int32 log_status; bool isnull; /* * Determine the currently active log table */ if (SPI_execp(cs->plan_get_logstatus, NULL, NULL, 0) < 0) elog(ERROR, "Slony-I: cannot determine log status"); if (SPI_processed != 1) elog(ERROR, "Slony-I: cannot determine log status"); log_status = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); SPI_freetuptable(SPI_tuptable); switch (log_status) { case 0: case 2: cs->plan_active_log = cs->plan_insert_log_1; break; case 1: case 3: cs->plan_active_log = cs->plan_insert_log_2; break; default: elog(ERROR, "Slony-I: illegal log status %d", log_status); break; } cs->currentXid = newXid; } /* * Determine cmdtype and cmddata depending on the command type */ if (TRIGGER_FIRED_BY_INSERT(tg->tg_event)) { HeapTuple new_row = tg->tg_trigtuple; TupleDesc tupdesc = tg->tg_relation->rd_att; char *col_ident; char *col_value; int len_ident; int len_value; int i; int need_comma = false; char *OldDateStyle; char *cp = VARDATA(cs->cmddata_buf); /* * INSERT * * cmdtype = 'I' cmddata = ("col" [, ...]) values ('value' [, ...]) */ cmdtype = cs->cmdtype_I; /* * Specify all the columns */ *cp++ = '('; for (i = 0; i < tg->tg_relation->rd_att->natts; i++) { /* * Skip dropped columns */ if (tupdesc->attrs[i]->attisdropped) continue; col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1)); cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 + (len_ident = strlen(col_ident)); if (cs->cmddata_size < cmddata_need) { int have = (cp - (char *) (cs->cmddata_buf)); while (cs->cmddata_size < cmddata_need) cs->cmddata_size *= 2; cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size); cp = (char *) (cs->cmddata_buf) + have; } if (need_comma) *cp++ = ','; else need_comma = true; memcpy(cp, col_ident, len_ident); cp += len_ident; } /* * Append the string ") values (" */ *cp++ = ')'; *cp++ = ' '; *cp++ = 'v'; *cp++ = 'a'; *cp++ = 'l'; *cp++ = 'u'; *cp++ = 'e'; *cp++ = 's'; *cp++ = ' '; *cp++ = '('; /* * Append the values */ need_comma = false; OldDateStyle = GetConfigOptionByName("DateStyle", NULL); if (!strstr(OldDateStyle, "ISO")) set_config_option("DateStyle", "ISO", PGC_USERSET, PGC_S_SESSION, true, true); for (i = 0; i < tg->tg_relation->rd_att->natts; i++) { /* * Skip dropped columns */ if (tupdesc->attrs[i]->attisdropped) continue; if ((col_value = SPI_getvalue(new_row, tupdesc, i + 1)) == NULL) { col_value = "NULL"; } else { col_value = slon_quote_literal(col_value); } cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 + (len_value = strlen(col_value)); if (cs->cmddata_size < cmddata_need) { int have = (cp - (char *) (cs->cmddata_buf)); while (cs->cmddata_size < cmddata_need) cs->cmddata_size *= 2; cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size); cp = (char *) (cs->cmddata_buf) + have; } if (need_comma) *cp++ = ','; else need_comma = true; memcpy(cp, col_value, len_value); cp += len_value; } if (!strstr(OldDateStyle, "ISO")) set_config_option("DateStyle", OldDateStyle, PGC_USERSET, PGC_S_SESSION, true, true); /* * Terminate and done */ *cp++ = ')'; *cp = '\0'; SET_VARSIZE(cs->cmddata_buf, VARHDRSZ + (cp - VARDATA(cs->cmddata_buf))); } else if (TRIGGER_FIRED_BY_UPDATE(tg->tg_event)) { HeapTuple old_row = tg->tg_trigtuple; HeapTuple new_row = tg->tg_newtuple; TupleDesc tupdesc = tg->tg_relation->rd_att; Datum old_value; Datum new_value; bool old_isnull; bool new_isnull; char *col_ident; char *col_value; int len_ident; int len_value; int i; int need_comma = false; int need_and = false; char *OldDateStyle; char *cp = VARDATA(cs->cmddata_buf); /* * UPDATE * * cmdtype = 'U' cmddata = "col_ident"='value' [, ...] where * "pk_ident" = 'value' [ and ...] */ cmdtype = cs->cmdtype_U; for (i = 0; i < tg->tg_relation->rd_att->natts; i++) { /* * Ignore dropped columns */ if (tupdesc->attrs[i]->attisdropped) continue; old_value = SPI_getbinval(old_row, tupdesc, i + 1, &old_isnull); new_value = SPI_getbinval(new_row, tupdesc, i + 1, &new_isnull); /* * If old and new value are NULL, the column is unchanged */ if (old_isnull && new_isnull) continue; /* * If both are NOT NULL, we need to compare the values and skip * setting the column if equal */ if (!old_isnull && !new_isnull) { Oid opr_oid; FmgrInfo *opr_finfo_p; /* * Lookup the equal operators function call info using the * typecache if available */ #ifdef HAVE_TYPCACHE TypeCacheEntry *type_cache; type_cache = lookup_type_cache( SPI_gettypeid(tupdesc, i + 1), TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO); opr_oid = type_cache->eq_opr; if (opr_oid == ARRAY_EQ_OP) opr_oid = InvalidOid; else opr_finfo_p = &(type_cache->eq_opr_finfo); #else FmgrInfo opr_finfo; opr_oid = compatible_oper_funcid(makeList1(makeString("=")), SPI_gettypeid(tupdesc, i + 1), SPI_gettypeid(tupdesc, i + 1), true); if (OidIsValid(opr_oid)) { fmgr_info(opr_oid, &opr_finfo); opr_finfo_p = &opr_finfo; } #endif /* * If we have an equal operator, use that to do binary * comparision. Else get the string representation of both * attributes and do string comparision. */ if (OidIsValid(opr_oid)) { if (DatumGetBool(FunctionCall2(opr_finfo_p, old_value, new_value))) continue; } else { char *old_strval = SPI_getvalue(old_row, tupdesc, i + 1); char *new_strval = SPI_getvalue(new_row, tupdesc, i + 1); if (strcmp(old_strval, new_strval) == 0) continue; } } if (need_comma) *cp++ = ','; else need_comma = true; col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1)); if (new_isnull) col_value = "NULL"; else { OldDateStyle = GetConfigOptionByName("DateStyle", NULL); if (!strstr(OldDateStyle, "ISO")) set_config_option("DateStyle", "ISO", PGC_USERSET, PGC_S_SESSION, true, true); col_value = slon_quote_literal(SPI_getvalue(new_row, tupdesc, i + 1)); if (!strstr(OldDateStyle, "ISO")) set_config_option("DateStyle", OldDateStyle, PGC_USERSET, PGC_S_SESSION, true, true); } cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 + (len_ident = strlen(col_ident)) + (len_value = strlen(col_value)); if (cs->cmddata_size < cmddata_need) { int have = (cp - (char *) (cs->cmddata_buf)); while (cs->cmddata_size < cmddata_need) cs->cmddata_size *= 2; cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size); cp = (char *) (cs->cmddata_buf) + have; } memcpy(cp, col_ident, len_ident); cp += len_ident; *cp++ = '='; memcpy(cp, col_value, len_value); cp += len_value; } /* * It can happen that the only UPDATE an application does is to set a * column to the same value again. In that case, we'd end up here with * no columns in the SET clause yet. We add the first key column here * with it's old value to simulate the same for the replication * engine. */ if (!need_comma) { for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++) { if (tupdesc->attrs[i]->attisdropped) continue; attkind_idx++; if (!attkind[attkind_idx]) elog(ERROR, "Slony-I: no key columns found in logTrigger() attkind parameter"); if (attkind[attkind_idx] == 'k') break; } col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1)); col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1)); cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 + (len_ident = strlen(col_ident)) + (len_value = strlen(col_value)); if (cs->cmddata_size < cmddata_need) { int have = (cp - (char *) (cs->cmddata_buf)); while (cs->cmddata_size < cmddata_need) cs->cmddata_size *= 2; cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size); cp = (char *) (cs->cmddata_buf) + have; } memcpy(cp, col_ident, len_ident); cp += len_ident; *cp++ = '='; memcpy(cp, col_value, len_value); cp += len_value; } *cp++ = ' '; *cp++ = 'w'; *cp++ = 'h'; *cp++ = 'e'; *cp++ = 'r'; *cp++ = 'e'; *cp++ = ' '; for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++) { /* * Ignore dropped columns */ if (tupdesc->attrs[i]->attisdropped) continue; attkind_idx++; if (!attkind[attkind_idx]) break; if (attkind[attkind_idx] != 'k') continue; col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1)); col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1)); if (col_value == NULL) elog(ERROR, "Slony-I: old key column %s.%s IS NULL on UPDATE", NameStr(tg->tg_relation->rd_rel->relname), col_ident); cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 + (len_ident = strlen(col_ident)) + (len_value = strlen(col_value)); if (cs->cmddata_size < cmddata_need) { int have = (cp - (char *) (cs->cmddata_buf)); while (cs->cmddata_size < cmddata_need) cs->cmddata_size *= 2; cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size); cp = (char *) (cs->cmddata_buf) + have; } if (need_and) { *cp++ = ' '; *cp++ = 'a'; *cp++ = 'n'; *cp++ = 'd'; *cp++ = ' '; } else need_and = true; memcpy(cp, col_ident, len_ident); cp += len_ident; *cp++ = '='; memcpy(cp, col_value, len_value); cp += len_value; } *cp = '\0'; SET_VARSIZE(cs->cmddata_buf, VARHDRSZ + (cp - VARDATA(cs->cmddata_buf))); } else if (TRIGGER_FIRED_BY_DELETE(tg->tg_event)) { HeapTuple old_row = tg->tg_trigtuple; TupleDesc tupdesc = tg->tg_relation->rd_att; char *col_ident; char *col_value; int len_ident; int len_value; int i; int need_and = false; char *cp = VARDATA(cs->cmddata_buf); /* * DELETE * * cmdtype = 'D' cmddata = "pk_ident"='value' [and ...] */ cmdtype = cs->cmdtype_D; for (i = 0, attkind_idx = -1; i < tg->tg_relation->rd_att->natts; i++) { if (tupdesc->attrs[i]->attisdropped) continue; attkind_idx++; if (!attkind[attkind_idx]) break; if (attkind[attkind_idx] != 'k') continue; col_ident = (char *) slon_quote_identifier(SPI_fname(tupdesc, i + 1)); col_value = slon_quote_literal(SPI_getvalue(old_row, tupdesc, i + 1)); if (col_value == NULL) elog(ERROR, "Slony-I: old key column %s.%s IS NULL on DELETE", NameStr(tg->tg_relation->rd_rel->relname), col_ident); cmddata_need = (cp - (char *) (cs->cmddata_buf)) + 16 + (len_ident = strlen(col_ident)) + (len_value = strlen(col_value)); if (cs->cmddata_size < cmddata_need) { int have = (cp - (char *) (cs->cmddata_buf)); while (cs->cmddata_size < cmddata_need) cs->cmddata_size *= 2; cs->cmddata_buf = realloc(cs->cmddata_buf, cs->cmddata_size); cp = (char *) (cs->cmddata_buf) + have; } if (need_and) { *cp++ = ' '; *cp++ = 'a'; *cp++ = 'n'; *cp++ = 'd'; *cp++ = ' '; } else need_and = true; memcpy(cp, col_ident, len_ident); cp += len_ident; *cp++ = '='; memcpy(cp, col_value, len_value); cp += len_value; } *cp = '\0'; SET_VARSIZE(cs->cmddata_buf, VARHDRSZ + (cp - VARDATA(cs->cmddata_buf))); } else elog(ERROR, "Slony-I: logTrigger() fired for unhandled event"); /* * Construct the parameter array and insert the log row. */ argv[0] = Int32GetDatum(tab_id); argv[1] = PointerGetDatum(cmdtype); argv[2] = PointerGetDatum(cs->cmddata_buf); SPI_execp(cs->plan_active_log, argv, NULL, 0); SPI_finish(); return PointerGetDatum(NULL); }
/* * LockGXact * Locate the prepared transaction and mark it busy for COMMIT or PREPARE. */ static GlobalTransaction LockGXact(const char *gid, Oid user) { int i; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; /* Ignore not-yet-valid GIDs */ if (!gxact->valid) continue; if (strcmp(gxact->gid, gid) != 0) continue; /* Found it, but has someone else got it locked? */ if (TransactionIdIsValid(gxact->locking_xid)) { if (TransactionIdIsActive(gxact->locking_xid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("prepared transaction with identifier \"%s\" is busy", gid))); gxact->locking_xid = InvalidTransactionId; } if (user != gxact->owner && !superuser_arg(user)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to finish prepared transaction"), errhint("Must be superuser or the user that prepared the transaction."))); /* * Note: it probably would be possible to allow committing from * another database; but at the moment NOTIFY is known not to work and * there may be some other issues as well. Hence disallow until * someone gets motivated to make it work. */ if (MyDatabaseId != gxact->proc.databaseId) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("prepared transaction belongs to another database"), errhint("Connect to the database where the transaction was prepared to finish it."))); /* OK for me to lock it */ gxact->locking_xid = GetTopTransactionId(); LWLockRelease(TwoPhaseStateLock); return gxact; } LWLockRelease(TwoPhaseStateLock); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("prepared transaction with identifier \"%s\" does not exist", gid))); /* NOTREACHED */ return NULL; }
void PersistentRelation_MarkCreatePending( RelFileNode *relFileNode, ItemPointer persistentTid, int64 *persistentSerialNum, bool flushToXLog) { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; RelationDirEntry relationDirEntry; ItemPointerData previousFreeTid; Datum values[Natts_gp_persistent_relation_node]; if (RelFileNode_IsEmpty(relFileNode)) { elog(ERROR, "Invalid RelFileNode (0,0,0)"); } MemSet(&previousFreeTid, 0, sizeof(ItemPointerData)); if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) { elog(Persistent_DebugPrintLevel(), "Skipping persistent relation '%s' because we are before persistence work", relpath(*relFileNode)); } *persistentSerialNum = 0; /* * The initdb process will load the persistent table once we out * of bootstrap mode. */ return; } PersistentRelation_VerifyInitScan(); PersistentFileSysObjName_SetRelationDir( &fsObjName, relFileNode, is_tablespace_shared); WRITE_PERSISTENT_STATE_ORDERED_LOCK; relationDirEntry = PersistentRelation_CreateEntryUnderLock(relFileNode); if (relationDirEntry == NULL) { /* If out of shared memory, no need to promote to PANIC. */ WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Out of shared-memory for persistent relations"), errhint("You may need to increase the gp_max_relations value"), errOmitLocation(true))); } relationDirEntry->state = PersistentFileSysState_CreatePending; GpPersistentRelationNode_SetDatumValues( values, relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, PersistentFileSysState_CreatePending, /* reserved */ 0, /* parentXid */ GetTopTransactionId(), /* persistentSerialNum */ 0, // This will be set by PersistentFileSysObj_AddTuple. &previousFreeTid, is_tablespace_shared(relFileNode->spcNode)); PersistentFileSysObj_AddTuple( PersistentFsObjType_RelationDir, values, flushToXLog, &relationDirEntry->persistentTid, &relationDirEntry->persistentSerialNum); *persistentTid = relationDirEntry->persistentTid; *persistentSerialNum = relationDirEntry->persistentSerialNum; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_relation( relFileNode->spcNode, relFileNode->dbNode, relFileNode->relNode, persistentTid, persistentSerialNum); #endif /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and * adding to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; }
/* * Indicate we intend to create a filespace file as part of the current transaction. * * An XLOG IntentToCreate record is generated that will guard the subsequent file-system * create in case the transaction aborts. * * After 1 or more calls to this routine to mark intention about filespace files that are going * to be created, call ~_DoPendingCreates to do the actual file-system creates. (See its * note on XLOG flushing). */ void PersistentFilespace_MarkCreatePending( Oid filespaceOid, /* The filespace where the filespace lives. */ int16 primaryDbId, char *primaryFilespaceLocation, /* * The primary filespace directory path. NOT Blank padded. * Just a NULL terminated string. */ int16 mirrorDbId, char *mirrorFilespaceLocation, MirroredObjectExistenceState mirrorExistenceState, ItemPointer persistentTid, /* TID of the gp_persistent_rel_files tuple for the rel file */ int64 *persistentSerialNum, bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { WRITE_PERSISTENT_STATE_ORDERED_LOCK_DECLARE; PersistentFileSysObjName fsObjName; FilespaceDirEntry filespaceDirEntry; TransactionId topXid; Datum values[Natts_gp_persistent_filespace_node]; char mirrorFilespaceLocationBlankPadded[FilespaceLocationBlankPaddedWithNullTermLen]; char primaryFilespaceLocationBlankPadded[FilespaceLocationBlankPaddedWithNullTermLen]; if (Persistent_BeforePersistenceWork()) { if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Skipping persistent filespace %u because we are before persistence work", filespaceOid); return; // The initdb process will load the persistent table once we out of bootstrap mode. } PersistentFilespace_VerifyInitScan(); PersistentFileSysObjName_SetFilespaceDir(&fsObjName,filespaceOid); topXid = GetTopTransactionId(); WRITE_PERSISTENT_STATE_ORDERED_LOCK; PersistentFilespace_BlankPadCopyLocation( primaryFilespaceLocationBlankPadded, primaryFilespaceLocation); PersistentFilespace_BlankPadCopyLocation( mirrorFilespaceLocationBlankPadded, mirrorFilespaceLocation); GpPersistentFilespaceNode_SetDatumValues( values, filespaceOid, primaryDbId, primaryFilespaceLocationBlankPadded, mirrorDbId, mirrorFilespaceLocationBlankPadded, PersistentFileSysState_CreatePending, /* createMirrorDataLossTrackingSessionNum */ 0, mirrorExistenceState, /* reserved */ 0, /* parentXid */ topXid, /* persistentSerialNum */ 0); // This will be set by PersistentFileSysObj_AddTuple. PersistentFileSysObj_AddTuple( PersistentFsObjType_FilespaceDir, values, flushToXLog, persistentTid, persistentSerialNum); WRITE_FILESPACE_HASH_LOCK; filespaceDirEntry = PersistentFilespace_CreateDirUnderLock(filespaceOid); Assert(filespaceDirEntry != NULL); filespaceDirEntry->dbId1 = primaryDbId; memcpy(filespaceDirEntry->locationBlankPadded1, primaryFilespaceLocationBlankPadded, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->dbId2 = mirrorDbId; memcpy(filespaceDirEntry->locationBlankPadded2, mirrorFilespaceLocationBlankPadded, FilespaceLocationBlankPaddedWithNullTermLen); filespaceDirEntry->state = PersistentFileSysState_CreatePending; ItemPointerCopy(persistentTid, &filespaceDirEntry->persistentTid); filespaceDirEntry->persistentSerialNum = *persistentSerialNum; WRITE_FILESPACE_HASH_UNLOCK; /* * This XLOG must be generated under the persistent write-lock. */ #ifdef MASTER_MIRROR_SYNC mmxlog_log_create_filespace(filespaceOid); #endif SIMPLE_FAULT_INJECTOR(FaultBeforePendingDeleteFilespaceEntry); /* * MPP-18228 * To make adding 'Create Pending' entry to persistent table and adding * to the PendingDelete list atomic */ PendingDelete_AddCreatePendingEntryWrapper( &fsObjName, persistentTid, *persistentSerialNum); WRITE_PERSISTENT_STATE_ORDERED_UNLOCK; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Persistent filespace directory: Add '%s' in state 'Created', mirror existence state '%s', serial number " INT64_FORMAT " at TID %s", PersistentFileSysObjName_ObjectName(&fsObjName), MirroredObjectExistenceState_Name(mirrorExistenceState), *persistentSerialNum, ItemPointerToString(persistentTid)); }
/* * Initialize a sequence's relation with the specified tuple as content */ static void fill_seq_with_data(Relation rel, HeapTuple tuple) { Buffer buf; Page page; sequence_magic *sm; OffsetNumber offnum; /* Initialize first page of relation with special magic number */ buf = ReadBuffer(rel, P_NEW); Assert(BufferGetBlockNumber(buf) == 0); page = BufferGetPage(buf); PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); sm = (sequence_magic *) PageGetSpecialPointer(page); sm->magic = SEQ_MAGIC; /* Now insert sequence tuple */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* * Since VACUUM does not process sequences, we have to force the tuple to * have xmin = FrozenTransactionId now. Otherwise it would become * invisible to SELECTs after 2G transactions. It is okay to do this * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); HeapTupleHeaderSetXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(rel)) GetTopTransactionId(); START_CRIT_SECTION(); MarkBufferDirty(buf); offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, false); if (offnum != FirstOffsetNumber) elog(ERROR, "failed to add sequence tuple to page"); /* XLOG stuff */ if (RelationNeedsWAL(rel)) { xl_seq_rec xlrec; XLogRecPtr recptr; XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); xlrec.node = rel->rd_node; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) tuple->t_data, tuple->t_len); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); PageSetLSN(page, recptr); } END_CRIT_SECTION(); UnlockReleaseBuffer(buf); }