/* * Record the parent of a subtransaction in the subtrans log. * * In some cases we may need to overwrite an existing value. */ void SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; Assert(TransactionIdIsValid(parent)); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid); ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; /* Current state should be 0 */ Assert(*ptr == InvalidTransactionId || (*ptr == parent && overwriteOK)); *ptr = parent; SubTransCtl->shared->page_dirty[slotno] = true; LWLockRelease(SubtransControlLock); }
/* * Make sure that SUBTRANS has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen * unless we're forced to write out a dirty subtrans page to make room * in shared memory. */ void ExtendSUBTRANS(TransactionId newestXact) { int pageno; /* * Caller must have already taken mirrored lock shared. */ /* * No work except at first XID of a page. But beware: just after * wraparound, the first XID of page zero is FirstNormalTransactionId. */ if (TransactionIdToEntry(newestXact) != 0 && !TransactionIdEquals(newestXact, FirstNormalTransactionId)) return; pageno = TransactionIdToPage(newestXact); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); /* Zero the page */ ZeroSUBTRANSPage(pageno); LWLockRelease(SubtransControlLock); }
/* * Interrogate the parent of a transaction in the subtrans log. */ TransactionId SubTransGetParent(TransactionId xid) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; TransactionId parent; /* Can't ask about stuff that might not be around anymore */ Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin)); /* Bootstrap and frozen XIDs have no parent */ if (!TransactionIdIsNormal(xid)) return InvalidTransactionId; /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid); ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; parent = *ptr; LWLockRelease(SubtransControlLock); return parent; }
/* * Record the parent of a subtransaction in the subtrans log. */ void SubTransSetParent(TransactionId xid, TransactionId parent) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; Assert(TransactionIdIsValid(parent)); Assert(TransactionIdFollows(xid, parent)); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid); ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; /* * It's possible we'll try to set the parent xid multiple times but we * shouldn't ever be changing the xid from one valid xid to another valid * xid, which would corrupt the data structure. */ if (*ptr != parent) { Assert(*ptr == InvalidTransactionId); *ptr = parent; SubTransCtl->shared->page_dirty[slotno] = true; } LWLockRelease(SubtransControlLock); }
/* * Make sure that SUBTRANS has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen * unless we're forced to write out a dirty subtrans page to make room * in shared memory. */ void ExtendSUBTRANS(TransactionId newestXact) { int pageno; /* * No work except at first XID of a page. But beware: just after * wraparound, the first XID of page zero is FirstNormalTransactionId. */ #ifdef PGXC /* PGXC_COORD || PGXC_DATANODE */ /* * In PGXC, it may be that a node is not involved in a transaction, * and therefore will be skipped, so we need to detect this by using * the latest_page_number instead of the pg index. * * Also, there is a special case of when transactions wrap-around that * we need to detect. */ pageno = TransactionIdToPage(newestXact); /* * The first condition makes sure we did not wrap around * The second checks if we are still using the same page. * Note that this value can change and we are not holding a lock, * so we repeat the check below. We do it this way instead of * grabbing the lock to avoid lock contention. */ if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA && pageno <= SubTransCtl->shared->latest_page_number) return; #else if (TransactionIdToEntry(newestXact) != 0 && !TransactionIdEquals(newestXact, FirstNormalTransactionId)) return; pageno = TransactionIdToPage(newestXact); #endif LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); #ifdef PGXC /* * We repeat the check. Another process may have written * out the page already and advanced the latest_page_number * while we were waiting for the lock. */ if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA && pageno <= SubTransCtl->shared->latest_page_number) { LWLockRelease(SubtransControlLock); return; } #endif /* Zero the page */ ZeroSUBTRANSPage(pageno); LWLockRelease(SubtransControlLock); }
Datum spoof_next_xid(PG_FUNCTION_ARGS) { TransactionId desiredXid = PG_GETARG_UINT32(0); TransactionId oldXid = ShmemVariableCache->nextXid; ShmemVariableCache->nextXid = desiredXid; /* * If we're raising the xid, the intent is presumably to cross some * threshold and make assertions about expected behavior. * On the other hand, lowering the xid is meant to be a tear down of * a completed test case. Because of this distinction, only when * we're raising the xid, do we take extra precaution to zero out * the new pg_clog/pg_subtrans/pg_distributedlog files. (We don't * want to zero out existing files...) */ if (TransactionIdFollows(desiredXid, oldXid)) { /* * The nature of xid arithmetic is such that we only bother zeroing out * new pages of transaction files when we've crossed page boundaries. * So, here we fool the following routines into zeroing out the desired * pages of transaction metadata by lowering the input xid to the first * of its corresponding page. */ #define CLOG_XACTS_PER_BYTE 4 #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) ExtendCLOG(desiredXid - TransactionIdToPgIndex(desiredXid)); #define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(SubTransData)) #define TransactionIdToEntry(xid) ((xid) % (uint32) SUBTRANS_XACTS_PER_PAGE) ExtendSUBTRANS(desiredXid - TransactionIdToEntry(desiredXid)); #undef TransactionIdToEntry #define ENTRIES_PER_PAGE (BLCKSZ / sizeof(DistributedLogEntry)) #define TransactionIdToEntry(localXid) ((localXid) % (TransactionId) ENTRIES_PER_PAGE) DistributedLog_Extend(desiredXid - TransactionIdToEntry(desiredXid)); } PG_RETURN_XID(oldXid); }
/* * Record the parent of a subtransaction in the subtrans log. */ void SubTransSetParent(TransactionId xid, TransactionId parent) { MIRRORED_LOCK_DECLARE; int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; SubTransData *ptr; SubTransData subData; /* * Main Xact has parent and topMostParent as InvalidTransactionId */ if ( parent != InvalidTransactionId ) { /* Get the topMostParent for Parent */ SubTransGetData(parent, &subData); } else { subData.topMostParent = InvalidTransactionId; } MIRRORED_LOCK; LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(SubTransCtl, pageno, xid); ptr = (SubTransData *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; /* Current state should be 0 */ Assert(ptr->parent == InvalidTransactionId); Assert(ptr->topMostParent == InvalidTransactionId); ptr->parent = parent; ptr->topMostParent = subData.topMostParent; SubTransCtl->shared->page_dirty[slotno] = true; LWLockRelease(SubtransControlLock); MIRRORED_UNLOCK; }
/* * A bug found in MPP-20426 was we were overrunnig to the next page * of DistributedLog. The intention of the memset with zeors is to * reset the reset of the current page if we are in the middle of page, * so that we won't see uncommited data due to some recovery work. * However, we were doing the wrong math that calculates the size of * rest of page as the size of the part preceding to the current xid. * The worst scenario was for the subtransaction shared memory, which * follows distributed log shared memory to be overwritten. */ static MPP_20426(void **state, TransactionId nextXid) { char pages[BLCKSZ * DtxLogStartupNumPage]; char zeros[BLCKSZ]; int bytes; /* Setup DistributedLogCtl */ DistributedLogCtl->shared = (SlruShared) malloc(sizeof(SlruSharedData)); DistributedLogCtl->shared->page_buffer = (char **) malloc(DtxLogStartupNumPage * sizeof(char *)); DistributedLogCtl->shared->page_dirty = (bool *) malloc(DtxLogStartupNumPage * sizeof(bool)); DistributedLogCtl->shared->page_buffer[0] = &pages[0]; DistributedLogCtl->shared->page_buffer[1] = &pages[BLCKSZ]; memset(pages, 0x7f, sizeof(pages)); memset(zeros, 0, sizeof(zeros)); expect_value(LWLockAcquire, lockid, DistributedLogControlLock); expect_value(LWLockAcquire, mode, LW_EXCLUSIVE); will_be_called(LWLockAcquire); /* This test is only for the case xid is not on the boundary. */ expect_value(SimpleLruReadPage, ctl, DistributedLogCtl); expect_any(SimpleLruReadPage, pageno); expect_value(SimpleLruReadPage, xid, nextXid); will_return(SimpleLruReadPage, 0); expect_value(LWLockRelease, lockid, DistributedLogControlLock); will_be_called(LWLockRelease); /* Run the function. */ DistributedLog_Startup(nextXid, nextXid); /* DistributedLog_Startup should not overwrite the subsequent block. */ assert_true(pages[BLCKSZ] == 0x7f); /* Make sure the part following the xid is zeroed. */ bytes = TransactionIdToEntry(nextXid) * sizeof(DistributedLogEntry); assert_memory_equal(&pages[bytes], zeros, BLCKSZ - bytes); free(DistributedLogCtl->shared->page_dirty); free(DistributedLogCtl->shared->page_buffer); free(DistributedLogCtl->shared); }
static void SubTransGetData(TransactionId xid, SubTransData* subData) { MIRRORED_LOCK_DECLARE; int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; SubTransData *ptr; /* Can't ask about stuff that might not be around anymore */ Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin)); /* Bootstrap and frozen XIDs have no parent and itself as topMostParent */ if (!TransactionIdIsNormal(xid)) { subData->parent = InvalidTransactionId; subData->topMostParent = xid; return; } MIRRORED_LOCK; /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid, NULL); ptr = (SubTransData *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; subData->parent = ptr->parent; subData->topMostParent = ptr->topMostParent; if ( subData->topMostParent == InvalidTransactionId ) { /* Here means parent is Main XID, hence set parent itself as topMostParent */ subData->topMostParent = xid; } LWLockRelease(SubtransControlLock); MIRRORED_UNLOCK; return; }
/* * Make sure that DistributedLog has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen * unless we're forced to write out a dirty DistributedLog or xlog page * to make room in shared memory. */ void DistributedLog_Extend(TransactionId newestXact) { MIRRORED_LOCK_DECLARE; int page; /* * No work except at first XID of a page. But beware: just after * wraparound, the first XID of page zero is FirstNormalTransactionId. */ if (TransactionIdToEntry(newestXact) != 0 && !TransactionIdEquals(newestXact, FirstNormalTransactionId)) return; page = TransactionIdToPage(newestXact); elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Extend page %d", page); MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); /* Zero the page and make an XLOG entry about it */ DistributedLog_ZeroPage(page, true); LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Extend with newest local xid = %d to page = %d", newestXact, page); }
/* * Record that a distributed transaction committed in the distributed log. * */ void DistributedLog_SetCommitted( TransactionId localXid, DistributedTransactionTimeStamp distribTimeStamp, DistributedTransactionId distribXid, bool isRedo) { MIRRORED_LOCK_DECLARE; int page = TransactionIdToPage(localXid); int entryno = TransactionIdToEntry(localXid); int slotno; DistributedLogEntry *ptr; bool alreadyThere = false; MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); if (isRedo) { elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_SetCommitted check if page %d is present", page); if (!SimpleLruPageExists(DistributedLogCtl, page)) { DistributedLog_ZeroPage(page, /* writeXLog */ false); elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_SetCommitted zeroed page %d", page); } } slotno = SimpleLruReadPage(DistributedLogCtl, page, localXid); ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; if (ptr->distribTimeStamp != 0 || ptr->distribXid != 0) { if (ptr->distribTimeStamp != distribTimeStamp) elog(ERROR, "Current distributed timestamp = %u does not match input timestamp = %u for local xid = %u in distributed log (page = %d, entryno = %d)", ptr->distribTimeStamp, distribTimeStamp, localXid, page, entryno); if (ptr->distribXid != distribXid) elog(ERROR, "Current distributed xid = %u does not match input distributed xid = %u for local xid = %u in distributed log (page = %d, entryno = %d)", ptr->distribXid, distribXid, localXid, page, entryno); alreadyThere = true; } else { ptr->distribTimeStamp = distribTimeStamp; ptr->distribXid = distribXid; DistributedLogCtl->shared->page_dirty[slotno] = true; } LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_SetCommitted with local xid = %d (page = %d, entryno = %d) and distributed transaction xid = %u (timestamp = %u) status = %s", localXid, page, entryno, distribXid, distribTimeStamp, (alreadyThere ? "already there" : "set")); }
/* * This must be called ONCE during postmaster or standalone-backend startup, * after StartupXLOG has initialized ShmemVariableCache->nextXid. */ void DistributedLog_Startup( TransactionId oldestActiveXid, TransactionId nextXid) { MIRRORED_LOCK_DECLARE; int startPage; int endPage; /* * UNDONE: We really need oldest frozen xid. If we can't get it, then * we will need to tolerate not finiding a page in * DistributedLog_SetCommitted and DistributedLog_IsCommitted. */ startPage = TransactionIdToPage(oldestActiveXid); endPage = TransactionIdToPage(nextXid); MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); elog((Debug_print_full_dtm ? LOG : DEBUG5), "DistributedLog_Startup startPage %d, endPage %d", startPage, endPage); /* * Initialize our idea of the latest page number. */ DistributedLogCtl->shared->latest_page_number = endPage; /* * Zero out the remainder of the current DistributedLog page. Under normal * circumstances it should be zeroes already, but it seems at least * theoretically possible that XLOG replay will have settled on a nextXID * value that is less than the last XID actually used and marked by the * previous database lifecycle (since subtransaction commit writes clog * but makes no WAL entry). Let's just be safe. (We need not worry about * pages beyond the current one, since those will be zeroed when first * used. For the same reason, there is no need to do anything when * nextXid is exactly at a page boundary; and it's likely that the * "current" page doesn't exist yet in that case.) */ if (TransactionIdToEntry(nextXid) != 0) { int entryno = TransactionIdToEntry(nextXid); int slotno; DistributedLogEntry *ptr; int remainingEntries; slotno = SimpleLruReadPage(DistributedLogCtl, endPage, nextXid); ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; /* Zero the rest of the page */ remainingEntries = ENTRIES_PER_PAGE - entryno; MemSet(ptr, 0, remainingEntries * sizeof(DistributedLogEntry)); DistributedLogCtl->shared->page_dirty[slotno] = true; } LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; }
/* * Find the next lowest transaction with a logged or recorded status. * Currently on distributed commits are recorded. */ bool DistributedLog_ScanForPrevCommitted( TransactionId *indexXid, DistributedTransactionTimeStamp *distribTimeStamp, DistributedTransactionId *distribXid) { MIRRORED_LOCK_DECLARE; TransactionId highXid; int pageno; TransactionId lowXid; int slotno; TransactionId xid; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; if ((*indexXid) == InvalidTransactionId) return false; highXid = (*indexXid) - 1; if (highXid < FirstNormalTransactionId) return false; MIRRORED_LOCK; while (true) { pageno = TransactionIdToPage(highXid); /* * Compute the xid floor for the page. */ lowXid = pageno * (TransactionId) ENTRIES_PER_PAGE; if (lowXid == InvalidTransactionId) lowXid = FirstNormalTransactionId; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); /* * Peek to see if page exists. */ if (!SimpleLruPageExists(DistributedLogCtl, pageno)) { LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; *indexXid = InvalidTransactionId; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } slotno = SimpleLruReadPage(DistributedLogCtl, pageno, highXid); for (xid = highXid; xid >= lowXid; xid--) { int entryno = TransactionIdToEntry(xid); DistributedLogEntry *ptr; ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; if (ptr->distribTimeStamp != 0 && ptr->distribXid != 0) { *indexXid = xid; *distribTimeStamp = ptr->distribTimeStamp; *distribXid = ptr->distribXid; LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; return true; } } LWLockRelease(DistributedLogControlLock); if (lowXid == FirstNormalTransactionId) { MIRRORED_UNLOCK; *indexXid = InvalidTransactionId; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } highXid = lowXid - 1; // Go to last xid of previous page. } MIRRORED_UNLOCK; return false; // We'll never reach this. }
/* * Determine if a distributed transaction committed in the distributed log. */ bool DistributedLog_CommittedCheck( TransactionId localXid, DistributedTransactionTimeStamp *distribTimeStamp, DistributedTransactionId *distribXid) { MIRRORED_LOCK_DECLARE; int page = TransactionIdToPage(localXid); int entryno = TransactionIdToEntry(localXid); int slotno; DistributedLogEntry *ptr; MIRRORED_LOCK; LWLockAcquire(DistributedLogControlLock, LW_EXCLUSIVE); if (DistributedLogShared->knowHighestUnusedPage && page <= DistributedLogShared->highestUnusedPage) { /* * We prevously discovered we didn't have the page... */ LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } /* * Peek to see if page exists. */ if (!SimpleLruPageExists(DistributedLogCtl, page)) { if (DistributedLogShared->knowHighestUnusedPage) { if (DistributedLogShared->highestUnusedPage > page) DistributedLogShared->highestUnusedPage = page; } else { DistributedLogShared->knowHighestUnusedPage = true; DistributedLogShared->highestUnusedPage = page; } LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; *distribTimeStamp = 0; // Set it to something. *distribXid = 0; return false; } slotno = SimpleLruReadPage(DistributedLogCtl, page, localXid); ptr = (DistributedLogEntry *) DistributedLogCtl->shared->page_buffer[slotno]; ptr += entryno; *distribTimeStamp = ptr->distribTimeStamp; *distribXid = ptr->distribXid; ptr = NULL; LWLockRelease(DistributedLogControlLock); MIRRORED_UNLOCK; if (*distribTimeStamp != 0 && *distribXid != 0) { return true; } else if (*distribTimeStamp == 0 && *distribXid == 0) { // Not found. return false; } else { if (*distribTimeStamp == 0) elog(ERROR, "Found zero timestamp for local xid = %u in distributed log (distributed xid = %u, page = %d, entryno = %d)", localXid, *distribXid, page, entryno); elog(ERROR, "Found zero distributed xid for local xid = %u in distributed log (dtx start time = %u, page = %d, entryno = %d)", localXid, *distribTimeStamp, page, entryno); return false; // We'll never reach here. } }