/* * This must be called ONCE during postmaster or standalone-backend startup, * after StartupXLOG has initialized ShmemVariableCache->nextXid. * * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid * if there are none. */ void StartupSUBTRANS(TransactionId oldestActiveXID) { int startPage; int endPage; /* * Since we don't expect pg_subtrans to be valid across crashes, we * initialize the currently-active page(s) to zeroes during startup. * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero * the new page without regard to whatever was previously on disk. */ LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); startPage = TransactionIdToPage(oldestActiveXID); endPage = TransactionIdToPage(ShmemVariableCache->nextXid); while (startPage != endPage) { (void) ZeroSUBTRANSPage(startPage); startPage++; } (void) ZeroSUBTRANSPage(startPage); LWLockRelease(SubtransControlLock); }
/* * Make sure that SUBTRANS has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen * unless we're forced to write out a dirty subtrans page to make room * in shared memory. */ void ExtendSUBTRANS(TransactionId newestXact) { int pageno; /* * No work except at first XID of a page. But beware: just after * wraparound, the first XID of page zero is FirstNormalTransactionId. */ #ifdef PGXC /* PGXC_COORD || PGXC_DATANODE */ /* * In PGXC, it may be that a node is not involved in a transaction, * and therefore will be skipped, so we need to detect this by using * the latest_page_number instead of the pg index. * * Also, there is a special case of when transactions wrap-around that * we need to detect. */ pageno = TransactionIdToPage(newestXact); /* * The first condition makes sure we did not wrap around * The second checks if we are still using the same page. * Note that this value can change and we are not holding a lock, * so we repeat the check below. We do it this way instead of * grabbing the lock to avoid lock contention. */ if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA && pageno <= SubTransCtl->shared->latest_page_number) return; #else if (TransactionIdToEntry(newestXact) != 0 && !TransactionIdEquals(newestXact, FirstNormalTransactionId)) return; pageno = TransactionIdToPage(newestXact); #endif LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); #ifdef PGXC /* * We repeat the check. Another process may have written * out the page already and advanced the latest_page_number * while we were waiting for the lock. */ if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA && pageno <= SubTransCtl->shared->latest_page_number) { LWLockRelease(SubtransControlLock); return; } #endif /* Zero the page */ ZeroSUBTRANSPage(pageno); LWLockRelease(SubtransControlLock); }
/* * Make sure that SUBTRANS has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen * unless we're forced to write out a dirty subtrans page to make room * in shared memory. */ void ExtendSUBTRANS(TransactionId newestXact) { int pageno; /* * Caller must have already taken mirrored lock shared. */ /* * No work except at first XID of a page. But beware: just after * wraparound, the first XID of page zero is FirstNormalTransactionId. */ if (TransactionIdToEntry(newestXact) != 0 && !TransactionIdEquals(newestXact, FirstNormalTransactionId)) return; pageno = TransactionIdToPage(newestXact); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); /* Zero the page */ ZeroSUBTRANSPage(pageno); LWLockRelease(SubtransControlLock); }
/* * Interrogate the parent of a transaction in the subtrans log. */ TransactionId SubTransGetParent(TransactionId xid) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; TransactionId parent; /* Can't ask about stuff that might not be around anymore */ Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin)); /* Bootstrap and frozen XIDs have no parent */ if (!TransactionIdIsNormal(xid)) return InvalidTransactionId; /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid); ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; parent = *ptr; LWLockRelease(SubtransControlLock); return parent; }
/* * Record the parent of a subtransaction in the subtrans log. */ void SubTransSetParent(TransactionId xid, TransactionId parent) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; Assert(TransactionIdIsValid(parent)); Assert(TransactionIdFollows(xid, parent)); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid); ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; /* * It's possible we'll try to set the parent xid multiple times but we * shouldn't ever be changing the xid from one valid xid to another valid * xid, which would corrupt the data structure. */ if (*ptr != parent) { Assert(*ptr == InvalidTransactionId); *ptr = parent; SubTransCtl->shared->page_dirty[slotno] = true; } LWLockRelease(SubtransControlLock); }
/* * Record the parent of a subtransaction in the subtrans log. * * In some cases we may need to overwrite an existing value. */ void SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; Assert(TransactionIdIsValid(parent)); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid); ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; /* Current state should be 0 */ Assert(*ptr == InvalidTransactionId || (*ptr == parent && overwriteOK)); *ptr = parent; SubTransCtl->shared->page_dirty[slotno] = true; LWLockRelease(SubtransControlLock); }
/* * Remove all SUBTRANS segments before the one holding the passed transaction ID * * This is normally called during checkpoint, with oldestXact being the * oldest TransactionXmin of any running transaction. */ void TruncateSUBTRANS(TransactionId oldestXact) { int cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We * pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXact); SimpleLruTruncate(SubTransCtl, cutoffPage); }
/* * Remove all DistributedLog segments before the one holding the passed * transaction ID * * Before removing any DistributedLog data, we must flush XLOG to disk, to * ensure that any recently-emitted HEAP_FREEZE records have reached disk; * otherwise a crash and restart might leave us with some unfrozen tuples * referencing removed DistributedLog data. We choose to emit a special * TRUNCATE XLOG record too. * * Replaying the deletion from XLOG is not critical, since the files could * just as well be removed later, but doing so prevents a long-running hot * standby server from acquiring an unreasonably bloated DistributedLog directory. * * Since DistributedLog segments hold a large number of transactions, the * opportunity to actually remove a segment is fairly rare, and so it seems * best not to do the XLOG flush unless we have confirmed that there is * a removable segment. */ void DistributedLog_Truncate(TransactionId oldestXid) { MIRRORED_LOCK_DECLARE; int cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We * pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXid); MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); /* Check to see if there's any files that could be removed */ if (!SlruScanDirectory(DistributedLogCtl, cutoffPage, false)) { LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; return; /* nothing to remove */ } /* * Remember this as the low-water mark to aid the virtual table over the * distributed log. */ DistributedLogShared->oldestXid = oldestXid; /* Write XLOG record and flush XLOG to disk */ DistributedLog_WriteTruncateXlogRec(cutoffPage); /* Now we can remove the old DistributedLog segment(s) */ SimpleLruTruncateWithLock(DistributedLogCtl, cutoffPage); /* we already hold the lock */ elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Truncate with oldest local xid = %d to cutoff page = %d", oldestXid, cutoffPage); LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; }
/* * Record the parent of a subtransaction in the subtrans log. */ void SubTransSetParent(TransactionId xid, TransactionId parent) { MIRRORED_LOCK_DECLARE; int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; SubTransData *ptr; SubTransData subData; /* * Main Xact has parent and topMostParent as InvalidTransactionId */ if ( parent != InvalidTransactionId ) { /* Get the topMostParent for Parent */ SubTransGetData(parent, &subData); } else { subData.topMostParent = InvalidTransactionId; } MIRRORED_LOCK; LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(SubTransCtl, pageno, xid); ptr = (SubTransData *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; /* Current state should be 0 */ Assert(ptr->parent == InvalidTransactionId); Assert(ptr->topMostParent == InvalidTransactionId); ptr->parent = parent; ptr->topMostParent = subData.topMostParent; SubTransCtl->shared->page_dirty[slotno] = true; LWLockRelease(SubtransControlLock); MIRRORED_UNLOCK; }
/* * Remove all SUBTRANS segments before the one holding the passed transaction ID * * This is normally called during checkpoint, with oldestXact being the * oldest TransactionXmin of any running transaction. */ void TruncateSUBTRANS(TransactionId oldestXact) { int cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We * pass the *page* containing oldestXact to SimpleLruTruncate. We step * back one transaction to avoid passing a cutoff page that hasn't been * created yet in the rare case that oldestXact would be the first item on * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ TransactionIdRetreat(oldestXact); cutoffPage = TransactionIdToPage(oldestXact); SimpleLruTruncate(SubTransCtl, cutoffPage); }
static void SubTransGetData(TransactionId xid, SubTransData* subData) { MIRRORED_LOCK_DECLARE; int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; SubTransData *ptr; /* Can't ask about stuff that might not be around anymore */ Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin)); /* Bootstrap and frozen XIDs have no parent and itself as topMostParent */ if (!TransactionIdIsNormal(xid)) { subData->parent = InvalidTransactionId; subData->topMostParent = xid; return; } MIRRORED_LOCK; /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid, NULL); ptr = (SubTransData *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; subData->parent = ptr->parent; subData->topMostParent = ptr->topMostParent; if ( subData->topMostParent == InvalidTransactionId ) { /* Here means parent is Main XID, hence set parent itself as topMostParent */ subData->topMostParent = xid; } LWLockRelease(SubtransControlLock); MIRRORED_UNLOCK; return; }
/* * Remove all SUBTRANS segments before the one holding the passed transaction ID * * This is normally called during checkpoint, with oldestXact being the * oldest TransactionXmin of any running transaction. */ void TruncateSUBTRANS(TransactionId oldestXact) { MIRRORED_LOCK_DECLARE; int cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We * pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXact); MIRRORED_LOCK; SimpleLruTruncate(SubTransCtl, cutoffPage, false); MIRRORED_UNLOCK; }
/* * Make sure that DistributedLog has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen * unless we're forced to write out a dirty DistributedLog or xlog page * to make room in shared memory. */ void DistributedLog_Extend(TransactionId newestXact) { MIRRORED_LOCK_DECLARE; int page; /* * No work except at first XID of a page. But beware: just after * wraparound, the first XID of page zero is FirstNormalTransactionId. */ if (TransactionIdToEntry(newestXact) != 0 && !TransactionIdEquals(newestXact, FirstNormalTransactionId)) return; page = TransactionIdToPage(newestXact); elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Extend page %d", page); MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); /* Zero the page and make an XLOG entry about it */ DistributedLog_ZeroPage(page, true); LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Extend with newest local xid = %d to page = %d", newestXact, page); }
/* * Record that a distributed transaction committed in the distributed log. * */ void DistributedLog_SetCommitted( TransactionId localXid, DistributedTransactionTimeStamp distribTimeStamp, DistributedTransactionId distribXid, bool isRedo) { MIRRORED_LOCK_DECLARE; int page = TransactionIdToPage(localXid); int entryno = TransactionIdToEntry(localXid); int slotno; DistributedLogEntry *ptr; bool alreadyThere = false; MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); if (isRedo) { elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_SetCommitted check if page %d is present", page); if (!SimpleLruPageExists(DistributedLogCtl, page)) { DistributedLog_ZeroPage(page, /* writeXLog */ false); elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_SetCommitted zeroed page %d", page); } } slotno = SimpleLruReadPage(DistributedLogCtl, page, localXid); ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; if (ptr->distribTimeStamp != 0 || ptr->distribXid != 0) { if (ptr->distribTimeStamp != distribTimeStamp) elog(ERROR, "Current distributed timestamp = %u does not match input timestamp = %u for local xid = %u in distributed log (page = %d, entryno = %d)", ptr->distribTimeStamp, distribTimeStamp, localXid, page, entryno); if (ptr->distribXid != distribXid) elog(ERROR, "Current distributed xid = %u does not match input distributed xid = %u for local xid = %u in distributed log (page = %d, entryno = %d)", ptr->distribXid, distribXid, localXid, page, entryno); alreadyThere = true; } else { ptr->distribTimeStamp = distribTimeStamp; ptr->distribXid = distribXid; DistributedLogCtl->shared->page_dirty[slotno] = true; } LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_SetCommitted with local xid = %d (page = %d, entryno = %d) and distributed transaction xid = %u (timestamp = %u) status = %s", localXid, page, entryno, distribXid, distribTimeStamp, (alreadyThere ? "already there" : "set")); }
/* * Determine if a distributed transaction committed in the distributed log. */ bool DistributedLog_CommittedCheck( TransactionId localXid, DistributedTransactionTimeStamp *distribTimeStamp, DistributedTransactionId *distribXid) { MIRRORED_LOCK_DECLARE; int page = TransactionIdToPage(localXid); int entryno = TransactionIdToEntry(localXid); int slotno; DistributedLogEntry *ptr; MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); if (DistributedLogShared->knowHighestUnusedPage && page <= DistributedLogShared->highestUnusedPage) { /* * We prevously discovered we didn't have the page... */ LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } /* * Peek to see if page exists. */ if (!SimpleLruPageExists(DistributedLogCtl, page)) { if (DistributedLogShared->knowHighestUnusedPage) { if (DistributedLogShared->highestUnusedPage > page) DistributedLogShared->highestUnusedPage = page; } else { DistributedLogShared->knowHighestUnusedPage = true; DistributedLogShared->highestUnusedPage = page; } LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } slotno = SimpleLruReadPage(DistributedLogCtl, page, localXid); ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; *distribTimeStamp = ptr->distribTimeStamp; *distribXid = ptr->distribXid; ptr = NULL; LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; if (*distribTimeStamp != 0 && *distribXid != 0) { return true; } else if (*distribTimeStamp == 0 && *distribXid == 0) { // Not found. return false; } else { if (*distribTimeStamp == 0) elog(ERROR, "Found zero timestamp for local xid = %u in distributed log (distributed xid = %u, page = %d, entryno = %d)", localXid, *distribXid, page, entryno); elog(ERROR, "Found zero distributed xid for local xid = %u in distributed log (dtx start time = %u, page = %d, entryno = %d)", localXid, *distribTimeStamp, page, entryno); return false; // We'll never reach here. } }
/* * Find the next lowest transaction with a logged or recorded status. * Currently on distributed commits are recorded. */ bool DistributedLog_ScanForPrevCommitted( TransactionId *indexXid, DistributedTransactionTimeStamp *distribTimeStamp, DistributedTransactionId *distribXid) { MIRRORED_LOCK_DECLARE; TransactionId highXid; int pageno; TransactionId lowXid; int slotno; TransactionId xid; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; if ((*indexXid) == InvalidTransactionId) return false; highXid = (*indexXid) - 1; if (highXid < FirstNormalTransactionId) return false; MIRRORED_LOCK; while (true) { pageno = TransactionIdToPage(highXid); /* * Compute the xid floor for the page. */ lowXid = pageno * (TransactionId) ENTRIES_PER_PAGE; if (lowXid == InvalidTransactionId) lowXid = FirstNormalTransactionId; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); /* * Peek to see if page exists. */ if (!SimpleLruPageExists(DistributedLogCtl, pageno)) { LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; *indexXid = InvalidTransactionId; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } slotno = SimpleLruReadPage(DistributedLogCtl, pageno, highXid); for (xid = highXid; xid >= lowXid; xid--) { int entryno = TransactionIdToEntry(xid); DistributedLogEntry *ptr; ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; if (ptr->distribTimeStamp != 0 && ptr->distribXid != 0) { *indexXid = xid; *distribTimeStamp = ptr->distribTimeStamp; *distribXid = ptr->distribXid; LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; return true; } } LWLockRelease(DistributedLogControlLock); if (lowXid == FirstNormalTransactionId) { MIRRORED_UNLOCK; *indexXid = InvalidTransactionId; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } highXid = lowXid - 1; // Go to last xid of previous page. } MIRRORED_UNLOCK; return false; // We'll never reach this. }
/* * This must be called ONCE during postmaster or standalone-backend startup, * after StartupXLOG has initialized ShmemVariableCache->nextXid. */ void DistributedLog_Startup( TransactionId oldestActiveXid, TransactionId nextXid) { MIRRORED_LOCK_DECLARE; int startPage; int endPage; /* * UNDONE: We really need oldest frozen xid. If we can't get it, then * we will need to tolerate not finiding a page in * DistributedLog_SetCommitted and DistributedLog_IsCommitted. */ startPage = TransactionIdToPage(oldestActiveXid); endPage = TransactionIdToPage(nextXid); MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Startup startPage %d, endPage %d", startPage, endPage); /* * Initialize our idea of the latest page number. */ DistributedLogCtl->shared->latest_page_number = endPage; /* * Zero out the remainder of the current DistributedLog page. Under normal * circumstances it should be zeroes already, but it seems at least * theoretically possible that XLOG replay will have settled on a nextXID * value that is less than the last XID actually used and marked by the * previous database lifecycle (since subtransaction commit writes clog * but makes no WAL entry). Let's just be safe. (We need not worry about * pages beyond the current one, since those will be zeroed when first * used. For the same reason, there is no need to do anything when * nextXid is exactly at a page boundary; and it's likely that the * "current" page doesn't exist yet in that case.) */ if (TransactionIdToEntry(nextXid) != 0) { int entryno = TransactionIdToEntry(nextXid); int slotno; DistributedLogEntry *ptr; int remainingEntries; slotno = SimpleLruReadPage(DistributedLogCtl, endPage, nextXid); ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; /* Zero the rest of the page */ remainingEntries = ENTRIES_PER_PAGE - entryno; MemSet(ptr, 0, remainingEntries * sizeof(DistributedLogEntry)); DistributedLogCtl->shared->page_dirty[slotno] = true; } LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; }