void xact_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; if (info == XLOG_XACT_COMMIT) { TransactionIdCommit(record->xl_xid); /* SHOULD REMOVE FILES OF ALL DROPPED RELATIONS */ } else if (info == XLOG_XACT_ABORT) { TransactionIdAbort(record->xl_xid); /* SHOULD REMOVE FILES OF ALL FAILED-TO-BE-CREATED RELATIONS */ } else elog(PANIC, "xact_redo: unknown op code %u", info); }
/* * RecordTransactionCommitPrepared * * This is basically the same as RecordTransactionCommit: in particular, * we must take the CheckpointStartLock to avoid a race condition. * * We know the transaction made at least one XLOG entry (its PREPARE), * so it is never possible to optimize out the commit record. */ static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileNode *rels) { XLogRecData rdata[3]; int lastrdata = 0; xl_xact_commit_prepared xlrec; XLogRecPtr recptr; START_CRIT_SECTION(); /* See notes in RecordTransactionCommit */ LWLockAcquire(CheckpointStartLock, LW_SHARED); /* Emit the XLOG commit record */ xlrec.xid = xid; xlrec.crec.xtime = time(NULL); xlrec.crec.nrels = nrels; xlrec.crec.nsubxacts = nchildren; rdata[0].data = (char *) (&xlrec); rdata[0].len = MinSizeOfXactCommitPrepared; rdata[0].buffer = InvalidBuffer; /* dump rels to delete */ if (nrels > 0) { rdata[0].next = &(rdata[1]); rdata[1].data = (char *) rels; rdata[1].len = nrels * sizeof(RelFileNode); rdata[1].buffer = InvalidBuffer; lastrdata = 1; } /* dump committed child Xids */ if (nchildren > 0) { rdata[lastrdata].next = &(rdata[2]); rdata[2].data = (char *) children; rdata[2].len = nchildren * sizeof(TransactionId); rdata[2].buffer = InvalidBuffer; lastrdata = 2; } rdata[lastrdata].next = NULL; recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED | XLOG_NO_TRAN, rdata); /* we don't currently try to sleep before flush here ... */ /* Flush XLOG to disk */ XLogFlush(recptr); /* Mark the transaction committed in pg_clog */ TransactionIdCommit(xid); /* to avoid race conditions, the parent must commit first */ TransactionIdCommitTree(nchildren, children); /* Checkpoint is allowed again */ LWLockRelease(CheckpointStartLock); END_CRIT_SECTION(); }
/* * RecordTransactionCommit */ void RecordTransactionCommit(void) { /* * If we made neither any XLOG entries nor any temp-rel updates, we * can omit recording the transaction commit at all. */ if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate) { TransactionId xid = GetCurrentTransactionId(); bool madeTCentries; XLogRecPtr recptr; /* Tell bufmgr and smgr to prepare for commit */ BufmgrCommit(); START_CRIT_SECTION(); /* * If our transaction made any transaction-controlled XLOG entries, * we need to lock out checkpoint start between writing our XLOG * record and updating pg_clog. Otherwise it is possible for the * checkpoint to set REDO after the XLOG record but fail to flush the * pg_clog update to disk, leading to loss of the transaction commit * if we crash a little later. Slightly klugy fix for problem * discovered 2004-08-10. * * (If it made no transaction-controlled XLOG entries, its XID * appears nowhere in permanent storage, so no one else will ever care * if it committed; so it doesn't matter if we lose the commit flag.) * * Note we only need a shared lock. */ madeTCentries = (MyLastRecPtr.xrecoff != 0); if (madeTCentries) LWLockAcquire(CheckpointStartLock, LW_SHARED); /* * We only need to log the commit in XLOG if the transaction made * any transaction-controlled XLOG entries. */ if (madeTCentries) { /* Need to emit a commit record */ XLogRecData rdata; xl_xact_commit xlrec; xlrec.xtime = time(NULL); rdata.buffer = InvalidBuffer; rdata.data = (char *) (&xlrec); rdata.len = SizeOfXactCommit; rdata.next = NULL; /* * XXX SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP */ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, &rdata); } else { /* Just flush through last record written by me */ recptr = ProcLastRecEnd; } /* * We must flush our XLOG entries to disk if we made any XLOG * entries, whether in or out of transaction control. For * example, if we reported a nextval() result to the client, this * ensures that any XLOG record generated by nextval will hit the * disk before we report the transaction committed. */ if (MyXactMadeXLogEntry) { /* * Sleep before flush! So we can flush more than one commit * records per single fsync. (The idea is some other backend * may do the XLogFlush while we're sleeping. This needs work * still, because on most Unixen, the minimum select() delay * is 10msec or more, which is way too long.) * * We do not sleep if enableFsync is not turned on, nor if there * are fewer than CommitSiblings other backends with active * transactions. */ if (CommitDelay > 0 && enableFsync && CountActiveBackends() >= CommitSiblings) { struct timeval delay; delay.tv_sec = 0; delay.tv_usec = CommitDelay; (void) select(0, NULL, NULL, NULL, &delay); } XLogFlush(recptr); } /* * We must mark the transaction committed in clog if its XID * appears either in permanent rels or in local temporary rels. We * test this by seeing if we made transaction-controlled entries * *OR* local-rel tuple updates. Note that if we made only the * latter, we have not emitted an XLOG record for our commit, and * so in the event of a crash the clog update might be lost. This * is okay because no one else will ever care whether we * committed. */ if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) TransactionIdCommit(xid); /* Unlock checkpoint lock if we acquired it */ if (madeTCentries) LWLockRelease(CheckpointStartLock); END_CRIT_SECTION(); } /* Break the chain of back-links in the XLOG records I output */ MyLastRecPtr.xrecoff = 0; MyXactMadeXLogEntry = false; MyXactMadeTempRelUpdate = false; /* Show myself as out of the transaction in PGPROC array */ MyProc->logRec.xrecoff = 0; }