/* * GetLatestSnapshot * Get a snapshot that is up-to-date as of the current instant, * even if we are executing in transaction-snapshot mode. */ Snapshot GetLatestSnapshot(void) { /* * We might be able to relax this, but nothing that could otherwise work * needs it. */ if (IsInParallelMode()) elog(ERROR, "cannot update SecondarySnapshot during a parallel operation"); /* * So far there are no cases requiring support for GetLatestSnapshot() * during logical decoding, but it wouldn't be hard to add if required. */ Assert(!HistoricSnapshotActive()); /* If first call in transaction, go ahead and set the xact snapshot */ if (!FirstSnapshotSet) return GetTransactionSnapshot(); SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData); return SecondarySnapshot; }
static void PreventAdvisoryLocksInParallelMode(void) { if (IsInParallelMode()) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot use advisory locks during a parallel operation"))); }
/* * RelationMapUpdateMap * * Install a new relfilenode mapping for the specified relation. * * If immediate is true (or we're bootstrapping), the mapping is activated * immediately. Otherwise it is made pending until CommandCounterIncrement. */ void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, bool immediate) { RelMapFile *map; if (IsBootstrapProcessingMode()) { /* * In bootstrap mode, the mapping gets installed in permanent map. */ if (shared) map = &shared_map; else map = &local_map; } else { /* * We don't currently support map changes within subtransactions, or * when in parallel mode. This could be done with more bookkeeping * infrastructure, but it doesn't presently seem worth it. */ if (GetCurrentTransactionNestLevel() > 1) elog(ERROR, "cannot change relation mapping within subtransaction"); if (IsInParallelMode()) elog(ERROR, "cannot change relation mapping in parallel mode"); if (immediate) { /* Make it active, but only locally */ if (shared) map = &active_shared_updates; else map = &active_local_updates; } else { /* Make it pending */ if (shared) map = &pending_shared_updates; else map = &pending_local_updates; } } apply_map_update(map, relationId, fileNode, true); }
/* * UpdateActiveSnapshotCommandId * * Update the current CID of the active snapshot. This can only be applied * to a snapshot that is not referenced elsewhere. */ void UpdateActiveSnapshotCommandId(void) { CommandId save_curcid, curcid; Assert(ActiveSnapshot != NULL); Assert(ActiveSnapshot->as_snap->active_count == 1); Assert(ActiveSnapshot->as_snap->regd_count == 0); /* * Don't allow modification of the active snapshot during parallel * operation. We share the snapshot to worker backends at beginning of * parallel operation, so any change to snapshot can lead to * inconsistencies. We have other defenses against * CommandCounterIncrement, but there are a few places that call this * directly, so we put an additional guard here. */ save_curcid = ActiveSnapshot->as_snap->curcid; curcid = GetCurrentCommandId(false); if (IsInParallelMode() && save_curcid != curcid) elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation"); ActiveSnapshot->as_snap->curcid = curcid; }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* * Return historic snapshot if doing logical decoding. We'll never need a * non-historic transaction snapshot in this (sub-)transaction, so there's * no need to be careful to set one up for later calls to * GetTransactionSnapshot(). */ if (HistoricSnapshotActive()) { Assert(!FirstSnapshotSet); return HistoricSnapshot; } /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(pairingheap_is_empty(&RegisteredSnapshots)); Assert(FirstXactSnapshot == NULL); if (IsInParallelMode()) elog(ERROR, "cannot take query snapshot during a parallel operation"); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * make a copy of it rather than returning CurrentSnapshotData * directly. Furthermore, if we're running in serializable mode, * predicate.c needs to wrap the snapshot fetch in its own processing. */ if (IsolationUsesXactSnapshot()) { /* First, create the snapshot in CurrentSnapshotData */ if (IsolationIsSerializable()) CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Make a saved copy */ CurrentSnapshot = CopySnapshot(CurrentSnapshot); FirstXactSnapshot = CurrentSnapshot; /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Don't allow catalog snapshot to be older than xact snapshot. */ CatalogSnapshotStale = true; FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) return CurrentSnapshot; /* Don't allow catalog snapshot to be older than xact snapshot. */ CatalogSnapshotStale = true; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* ---------------------------------------------------------------- * ExecGather(node) * * Scans the relation via multiple workers and returns * the next qualifying tuple. * ---------------------------------------------------------------- */ TupleTableSlot * ExecGather(GatherState *node) { TupleTableSlot *fslot = node->funnel_slot; int i; TupleTableSlot *slot; TupleTableSlot *resultSlot; ExprDoneCond isDone; ExprContext *econtext; /* * Initialize the parallel context and workers on first execution. We do * this on first execution rather than during node initialization, as it * needs to allocate large dynamic segement, so it is better to do if it * is really needed. */ if (!node->initialized) { EState *estate = node->ps.state; Gather *gather = (Gather *) node->ps.plan; /* * Sometimes we might have to run without parallelism; but if * parallel mode is active then we can try to fire up some workers. */ if (gather->num_workers > 0 && IsInParallelMode()) { ParallelContext *pcxt; bool got_any_worker = false; /* Initialize the workers required to execute Gather node. */ if (!node->pei) node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gather->num_workers); /* * Register backend workers. We might not get as many as we * requested, or indeed any at all. */ pcxt = node->pei->pcxt; LaunchParallelWorkers(pcxt); /* Set up tuple queue readers to read the results. */ if (pcxt->nworkers > 0) { node->nreaders = 0; node->reader = palloc(pcxt->nworkers * sizeof(TupleQueueReader *)); for (i = 0; i < pcxt->nworkers; ++i) { if (pcxt->worker[i].bgwhandle == NULL) continue; shm_mq_set_handle(node->pei->tqueue[i], pcxt->worker[i].bgwhandle); node->reader[node->nreaders++] = CreateTupleQueueReader(node->pei->tqueue[i], fslot->tts_tupleDescriptor); got_any_worker = true; } } /* No workers? Then never mind. */ if (!got_any_worker) ExecShutdownGatherWorkers(node); } /* Run plan locally if no workers or not single-copy. */ node->need_to_scan_locally = (node->reader == NULL) || !gather->single_copy; node->initialized = true; } /* * Check to see if we're still projecting out tuples from a previous scan * tuple (because there is a function-returning-set in the projection * expressions). If so, try to project another one. */ if (node->ps.ps_TupFromTlist) { resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone); if (isDone == ExprMultipleResult) return resultSlot; /* Done with that source tuple... */ node->ps.ps_TupFromTlist = false; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. Note we can't do this * until we're done projecting. */ econtext = node->ps.ps_ExprContext; ResetExprContext(econtext); /* Get and return the next tuple, projecting if necessary. */ for (;;) { /* * Get next tuple, either from one of our workers, or by running the * plan ourselves. */ slot = gather_getnext(node); if (TupIsNull(slot)) return NULL; /* * form the result tuple using ExecProject(), and return it --- unless * the projection produces an empty set, in which case we must loop * back around for another tuple */ econtext->ecxt_outertuple = slot; resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone); if (isDone != ExprEndResult) { node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult); return resultSlot; } } return slot; }
/* * We have to cut&paste copde of GetNewTransactionId from varsup because we change way of advancing ShmemVariableCache->nextXid */ TransactionId DtmGetNewTransactionId(bool isSubXact) { TransactionId xid; XTM_INFO("%d: GetNewTransactionId\n", getpid()); /* * Workers synchronize transaction state at the beginning of each parallel * operation, so we can't account for new XIDs after that point. */ if (IsInParallelMode()) elog(ERROR, "cannot assign TransactionIds during a parallel operation"); /* * During bootstrap initialization, we return the special bootstrap * transaction id. */ if (IsBootstrapProcessingMode()) { Assert(!isSubXact); MyPgXact->xid = BootstrapTransactionId; return BootstrapTransactionId; } /* safety check, we should never get this far in a HS slave */ if (RecoveryInProgress()) elog(ERROR, "cannot assign TransactionIds during recovery"); LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = DtmGetNextXid(); /*---------- * Check to see if it's safe to assign another XID. This protects against * catastrophic data loss due to XID wraparound. The basic rules are: * * If we're past xidVacLimit, start trying to force autovacuum cycles. * If we're past xidWarnLimit, start issuing warnings. * If we're past xidStopLimit, refuse to execute transactions, unless * we are running in single-user mode (which gives an escape hatch * to the DBA who somehow got past the earlier defenses). * * Note that this coding also appears in GetNewMultiXactId. *---------- */ if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit)) { /* * For safety's sake, we release XidGenLock while sending signals, * warnings, etc. This is not so much because we care about * preserving concurrency in this situation, as to avoid any * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit; TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit; TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit; Oid oldest_datoid = ShmemVariableCache->oldestXidDB; LWLockRelease(XidGenLock); /* * To avoid swamping the postmaster with signals, we issue the autovac * request only once per 64K transaction starts. This still gives * plenty of chances before we get into real trouble. */ if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); if (IsUnderPostmaster && TransactionIdFollowsOrEquals(xid, xidStopLimit)) { char *oldest_datname = get_database_name(oldest_datoid); /* complain even if that DB has disappeared */ if (oldest_datname) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"", oldest_datname), errhint("Stop the postmaster and vacuum that database in single-user mode.\n" "You might also need to commit or roll back old prepared transactions."))); else ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u", oldest_datoid), errhint("Stop the postmaster and vacuum that database in single-user mode.\n" "You might also need to commit or roll back old prepared transactions."))); } else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) { char *oldest_datname = get_database_name(oldest_datoid); /* complain even if that DB has disappeared */ if (oldest_datname) ereport(WARNING, (errmsg("database \"%s\" must be vacuumed within %u transactions", oldest_datname, xidWrapLimit - xid), errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions."))); else ereport(WARNING, (errmsg("database with OID %u must be vacuumed within %u transactions", oldest_datoid, xidWrapLimit - xid), errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions."))); } /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = DtmGetNextXid(); } /* * If we are allocating the first XID of a new page of the commit log, * zero out that commit-log page before returning. We must do this while * holding XidGenLock, else another xact could acquire and commit a later * XID before we zero the page. Fortunately, a page of the commit log * holds 32K or more transactions, so we don't have to do this very often. * * Extend pg_subtrans and pg_commit_ts too. */ if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->nextXid)) { ExtendCLOG(xid); ExtendCommitTs(xid); ExtendSUBTRANS(xid); } /* * Now advance the nextXid counter. This must not happen until after we * have successfully completed ExtendCLOG() --- if that routine fails, we * want the next incoming transaction to try it again. We cannot assign * more XIDs until there is CLOG space for them. */ if (xid == ShmemVariableCache->nextXid) TransactionIdAdvance(ShmemVariableCache->nextXid); else Assert(TransactionIdPrecedes(xid, ShmemVariableCache->nextXid)); /* * We must store the new XID into the shared ProcArray before releasing * XidGenLock. This ensures that every active XID older than * latestCompletedXid is present in the ProcArray, which is essential for * correct OldestXmin tracking; see src/backend/access/transam/README. * * XXX by storing xid into MyPgXact without acquiring ProcArrayLock, we * are relying on fetch/store of an xid to be atomic, else other backends * might see a partially-set xid here. But holding both locks at once * would be a nasty concurrency hit. So for now, assume atomicity. * * Note that readers of PGXACT xid fields should be careful to fetch the * value only once, rather than assume they can read a value multiple * times and get the same answer each time. * * The same comments apply to the subxact xid count and overflow fields. * * A solution to the atomic-store problem would be to give each PGXACT its * own spinlock used only for fetching/storing that PGXACT's xid and * related fields. * * If there's no room to fit a subtransaction XID into PGPROC, set the * cache-overflowed flag instead. This forces readers to look in * pg_subtrans to map subtransaction XIDs up to top-level XIDs. There is a * race-condition window, in that the new XID will not appear as running * until its parent link has been placed into pg_subtrans. However, that * will happen before anyone could possibly have a reason to inquire about * the status of the XID, so it seems OK. (Snapshots taken during this * window *will* include the parent XID, so they will deliver the correct * answer later on when someone does have a reason to inquire.) */ { /* * Use volatile pointer to prevent code rearrangement; other backends * could be examining my subxids info concurrently, and we don't want * them to see an invalid intermediate state, such as incrementing * nxids before filling the array entry. Note we are assuming that * TransactionId and int fetch/store are atomic. */ volatile PGPROC *myproc = MyProc; volatile PGXACT *mypgxact = MyPgXact; if (!isSubXact) mypgxact->xid = xid; else { int nxids = mypgxact->nxids; if (nxids < PGPROC_MAX_CACHED_SUBXIDS) { myproc->subxids.xids[nxids] = xid; mypgxact->nxids = nxids + 1; } else mypgxact->overflowed = true; } } LWLockRelease(XidGenLock); return xid; }
/* ---------------------------------------------------------------- * ExecGatherMerge(node) * * Scans the relation via multiple workers and returns * the next qualifying tuple. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecGatherMerge(PlanState *pstate) { GatherMergeState *node = castNode(GatherMergeState, pstate); TupleTableSlot *slot; ExprContext *econtext; int i; CHECK_FOR_INTERRUPTS(); /* * As with Gather, we don't launch workers until this node is actually * executed. */ if (!node->initialized) { EState *estate = node->ps.state; GatherMerge *gm = (GatherMerge *) node->ps.plan; /* * Sometimes we might have to run without parallelism; but if parallel * mode is active then we can try to fire up some workers. */ if (gm->num_workers > 0 && IsInParallelMode()) { ParallelContext *pcxt; /* Initialize data structures for workers. */ if (!node->pei) node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gm->num_workers); /* Try to launch workers. */ pcxt = node->pei->pcxt; LaunchParallelWorkers(pcxt); node->nworkers_launched = pcxt->nworkers_launched; /* Set up tuple queue readers to read the results. */ if (pcxt->nworkers_launched > 0) { node->nreaders = 0; node->reader = palloc(pcxt->nworkers_launched * sizeof(TupleQueueReader *)); Assert(gm->numCols); for (i = 0; i < pcxt->nworkers_launched; ++i) { shm_mq_set_handle(node->pei->tqueue[i], pcxt->worker[i].bgwhandle); node->reader[node->nreaders++] = CreateTupleQueueReader(node->pei->tqueue[i], node->tupDesc); } } else { /* No workers? Then never mind. */ ExecShutdownGatherMergeWorkers(node); } } /* always allow leader to participate */ node->need_to_scan_locally = true; node->initialized = true; } /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. */ econtext = node->ps.ps_ExprContext; ResetExprContext(econtext); /* * Get next tuple, either from one of our workers, or by running the plan * ourselves. */ slot = gather_merge_getnext(node); if (TupIsNull(slot)) return NULL; /* * form the result tuple using ExecProject(), and return it --- unless the * projection produces an empty set, in which case we must loop back * around for another tuple */ econtext->ecxt_outertuple = slot; return ExecProject(node->ps.ps_ProjInfo); }