/* * Reserve WAL for the currently active slot. * * Compute and set restart_lsn in a manner that's appropriate for the type of * the slot and concurrency safe. */ void ReplicationSlotReserveWal(void) { ReplicationSlot *slot = MyReplicationSlot; Assert(slot != NULL); Assert(slot->data.restart_lsn == InvalidXLogRecPtr); /* * The replication slot mechanism is used to prevent removal of required * WAL. As there is no interlock between this routine and checkpoints, WAL * segments could concurrently be removed when a now stale return value of * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that * this happens we'll just retry. */ while (true) { XLogSegNo segno; /* * For logical slots log a standby snapshot and start logical decoding * at exactly that position. That allows the slot to start up more * quickly. * * That's not needed (or indeed helpful) for physical slots as they'll * start replay at the last logged checkpoint anyway. Instead return * the location of the last redo LSN. While that slightly increases * the chance that we have to retry, it's where a base backup has to * start replay at. */ if (!RecoveryInProgress() && SlotIsLogical(slot)) { XLogRecPtr flushptr; /* start at current insert position */ slot->data.restart_lsn = GetXLogInsertRecPtr(); /* make sure we have enough information to start */ flushptr = LogStandbySnapshot(); /* and make sure it's fsynced to disk */ XLogFlush(flushptr); } else { slot->data.restart_lsn = GetRedoRecPtr(); } /* prevent WAL removal as fast as possible */ ReplicationSlotsComputeRequiredLSN(); /* * If all required WAL is still there, great, otherwise retry. The * slot should prevent further removal of WAL, unless there's a * concurrent ReplicationSlotsComputeRequiredLSN() after we've written * the new restart_lsn above, so normally we should never need to loop * more than twice. */ XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size); if (XLogGetLastRemovedSegno() < segno) break; } }
/* * Write a backup block if needed when we are setting a hint. Note that * this may be called for a variety of page types, not just heaps. * * Callable while holding just share lock on the buffer content. * * We can't use the plain backup block mechanism since that relies on the * Buffer being exclusively locked. Since some modifications (setting LSN, hint * bits) are allowed in a sharelocked buffer that can lead to wal checksum * failures. So instead we copy the page and insert the copied data as normal * record data. * * We only need to do something if page has not yet been full page written in * this checkpoint round. The LSN of the inserted wal record is returned if we * had to write, InvalidXLogRecPtr otherwise. * * It is possible that multiple concurrent backends could attempt to write WAL * records. In that case, multiple copies of the same block would be recorded * in separate WAL records by different backends, though that is still OK from * a correctness perspective. */ XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std) { XLogRecPtr recptr = InvalidXLogRecPtr; XLogRecPtr lsn; XLogRecPtr RedoRecPtr; /* * Ensure no checkpoint can change our view of RedoRecPtr. */ Assert(MyPgXact->delayChkpt); /* * Update RedoRecPtr so that we can make the right decision */ RedoRecPtr = GetRedoRecPtr(); /* * We assume page LSN is first data on *every* page that can be passed to * XLogInsert, whether it has the standard page layout or not. Since we're * only holding a share-lock on the page, we must take the buffer header * lock when we look at the LSN. */ lsn = BufferGetLSNAtomic(buffer); if (lsn <= RedoRecPtr) { int flags; char copied_buffer[BLCKSZ]; char *origdata = (char *) BufferGetBlock(buffer); RelFileNode rnode; ForkNumber forkno; BlockNumber blkno; /* * Copy buffer so we don't have to worry about concurrent hint bit or * lsn updates. We assume pd_lower/upper cannot be changed without an * exclusive lock, so the contents bkp are not racy. */ if (buffer_std) { /* Assume we can omit data between pd_lower and pd_upper */ Page page = BufferGetPage(buffer); uint16 lower = ((PageHeader) page)->pd_lower; uint16 upper = ((PageHeader) page)->pd_upper; memcpy(copied_buffer, origdata, lower); memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper); } else memcpy(copied_buffer, origdata, BLCKSZ); XLogBeginInsert(); flags = REGBUF_FORCE_IMAGE; if (buffer_std) flags |= REGBUF_STANDARD; BufferGetTag(buffer, &rnode, &forkno, &blkno); XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags); recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT); } return recptr; }
static int64 nextval_internal(Oid relid) { SeqTable elm; Relation seqrel; Buffer buf; Page page; HeapTupleData seqtuple; Form_pg_sequence seq; int64 incby, maxv, minv, cache, log, fetch, last; int64 result, next, rescnt = 0; bool logit = false; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_USAGE | ACL_UPDATE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ if (!seqrel->rd_islocaltemp) PreventCommandIfReadOnly("nextval()"); /* * Forbid this during parallel operation because, to make it work, * the cooperating backends would need to share the backend-local cached * sequence information. Currently, we don't support that. */ PreventCommandIfParallelMode("nextval()"); if (elm->last != elm->cached) /* some numbers were cached */ { Assert(elm->last_valid); Assert(elm->increment != 0); elm->last += elm->increment; relation_close(seqrel, NoLock); last_used_seq = elm; return elm->last; } /* lock page' buffer and read tuple */ seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple); page = BufferGetPage(buf); last = next = result = seq->last_value; incby = seq->increment_by; maxv = seq->max_value; minv = seq->min_value; fetch = cache = seq->cache_value; log = seq->log_cnt; if (!seq->is_called) { rescnt++; /* return last_value if not is_called */ fetch--; } /* * Decide whether we should emit a WAL log record. If so, force up the * fetch count to grab SEQ_LOG_VALS more values than we actually need to * cache. (These will then be usable without logging.) * * If this is the first nextval after a checkpoint, we must force a new___ * WAL record to be written anyway, else replay starting from the * checkpoint would fail to advance the sequence past the logged values. * In this case we may as well fetch extra values. */ if (log < fetch || !seq->is_called) { /* forced log to satisfy local demand for values */ fetch = log = fetch + SEQ_LOG_VALS; logit = true; } else { XLogRecPtr redoptr = GetRedoRecPtr(); if (PageGetLSN(page) <= redoptr) { /* last update of seq was before checkpoint */ fetch = log = fetch + SEQ_LOG_VALS; logit = true; } } while (fetch) /* try to fetch cache [+ log ] numbers */ { /* * Check MAXVALUE for ascending sequences and MINVALUE for descending * sequences */ if (incby > 0) { /* ascending sequence */ if ((maxv >= 0 && next > maxv - incby) || (maxv < 0 && next + incby > maxv)) { if (rescnt > 0) break; /* stop fetching */ if (!seq->is_cycled) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, maxv); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("nextval: reached maximum value of sequence \"%s\" (%s)", RelationGetRelationName(seqrel), buf))); } next = minv; } else next += incby; } else { /* descending sequence */ if ((minv < 0 && next < minv - incby) || (minv >= 0 && next + incby < minv)) { if (rescnt > 0) break; /* stop fetching */ if (!seq->is_cycled) { char buf[100]; snprintf(buf, sizeof(buf), INT64_FORMAT, minv); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("nextval: reached minimum value of sequence \"%s\" (%s)", RelationGetRelationName(seqrel), buf))); } next = maxv; } else next += incby; } fetch--; if (rescnt < cache) { log--; rescnt++; last = next; if (rescnt == 1) /* if it's first result - */ result = next; /* it's what to return */ } } log -= fetch; /* adjust for any unfetched numbers */ Assert(log >= 0); /* save info in local cache */ elm->last = result; /* last returned number */ elm->cached = last; /* last fetched number */ elm->last_valid = true; last_used_seq = elm; /* * If something needs to be WAL logged, acquire an xid, so this * transaction's commit will trigger a WAL flush and wait for * syncrep. It's sufficient to ensure the toplevel transaction has an xid, * no need to assign xids subxacts, that'll already trigger an appropriate * wait. (Have to do that here, so we're outside the critical section) */ if (logit && RelationNeedsWAL(seqrel)) GetTopTransactionId(); /* ready to change the on-disk (or really, in-buffer) tuple */ START_CRIT_SECTION(); /* * We must mark the buffer dirty before doing XLogInsert(); see notes in * SyncOneBuffer(). However, we don't apply the desired changes just yet. * This looks like a violation of the buffer update protocol, but it is in * fact safe because we hold exclusive lock on the buffer. Any other * process, including a checkpoint, that tries to examine the buffer * contents will block until we release the lock, and then will see the * final state that we install below. */ MarkBufferDirty(buf); /* XLOG stuff */ if (logit && RelationNeedsWAL(seqrel)) { xl_seq_rec xlrec; XLogRecPtr recptr; /* * We don't log the current state of the tuple, but rather the state * as it would appear after "log" more fetches. This lets us skip * that many future WAL records, at the cost that we lose those * sequence values if we crash. */ XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); /* set values that will be saved in xlog */ seq->last_value = next; seq->is_called = true; seq->log_cnt = 0; xlrec.node = seqrel->rd_node; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); PageSetLSN(page, recptr); } /* Now update sequence tuple to the intended final state */ seq->last_value = last; /* last fetched number */ seq->is_called = true; seq->log_cnt = log; /* how much is logged */ END_CRIT_SECTION(); UnlockReleaseBuffer(buf); relation_close(seqrel, NoLock); return result; }