/* * We keep some resources across transactions, so we attach everything to a * long-lived ResourceOwner, which prevents the below commit from thinking that * there are reference leaks */ static void start_executor(QueryDesc *queryDesc, MemoryContext context, ResourceOwner owner) { MemoryContext old; ResourceOwner save; StartTransactionCommand(); old = MemoryContextSwitchTo(context); save = CurrentResourceOwner; CurrentResourceOwner = owner; queryDesc->snapshot = GetTransactionSnapshot(); queryDesc->snapshot->copied = true; RegisterSnapshotOnOwner(queryDesc->snapshot, owner); ExecutorStart(queryDesc, 0); queryDesc->snapshot->active_count++; UnregisterSnapshotFromOwner(queryDesc->snapshot, owner); UnregisterSnapshotFromOwner(queryDesc->estate->es_snapshot, owner); CurrentResourceOwner = TopTransactionResourceOwner; MemoryContextSwitchTo(old); CommitTransactionCommand(); CurrentResourceOwner = save; }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(RegisteredSnapshots == 0); CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; /* * In serializable mode, the first snapshot must live until end of * xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ if (IsXactIsoLevelSerializable) { CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); registered_serializable = true; } return CurrentSnapshot; } if (IsXactIsoLevelSerializable) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* * RegisterSnapshot * Register a snapshot as being in use by the current resource owner * * If InvalidSnapshot is passed, it is not registered. */ Snapshot RegisterSnapshot(Snapshot snapshot) { if (snapshot == InvalidSnapshot) return InvalidSnapshot; return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner); }
static void set_snapshot(EState *estate, ResourceOwner owner) { estate->es_snapshot = GetTransactionSnapshot(); estate->es_snapshot->active_count++; estate->es_snapshot->copied = true; RegisterSnapshotOnOwner(estate->es_snapshot, owner); PushActiveSnapshot(estate->es_snapshot); }
/* * inv_open -- access an existing large object. * * Returns: * Large object descriptor, appropriately filled in. The descriptor * and subsidiary data are allocated in the specified memory context, * which must be suitably long-lived for the caller's purposes. */ LargeObjectDesc * inv_open(Oid lobjId, int flags, MemoryContext mcxt) { LargeObjectDesc *retval; Snapshot snapshot = NULL; int descflags = 0; if (flags & INV_WRITE) { snapshot = NULL; /* instantaneous MVCC snapshot */ descflags = IFS_WRLOCK | IFS_RDLOCK; } else if (flags & INV_READ) { snapshot = GetActiveSnapshot(); descflags = IFS_RDLOCK; } else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid flags for opening a large object: %d", flags))); /* Can't use LargeObjectExists here because we need to specify snapshot */ if (!myLargeObjectExists(lobjId, snapshot)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", lobjId))); /* * We must register the snapshot in TopTransaction's resowner, because it * must stay alive until the LO is closed rather than until the current * portal shuts down. Do this after checking that the LO exists, to avoid * leaking the snapshot if an error is thrown. */ if (snapshot) snapshot = RegisterSnapshotOnOwner(snapshot, TopTransactionResourceOwner); /* All set, create a descriptor */ retval = (LargeObjectDesc *) MemoryContextAlloc(mcxt, sizeof(LargeObjectDesc)); retval->id = lobjId; retval->subid = GetCurrentSubTransactionId(); retval->offset = 0; retval->snapshot = snapshot; retval->flags = descflags; return retval; }
/* * inv_open -- access an existing large object. * * Returns: * Large object descriptor, appropriately filled in. The descriptor * and subsidiary data are allocated in the specified memory context, * which must be suitably long-lived for the caller's purposes. */ LargeObjectDesc * inv_open(Oid lobjId, int flags, MemoryContext mcxt) { LargeObjectDesc *retval; retval = (LargeObjectDesc *) MemoryContextAlloc(mcxt, sizeof(LargeObjectDesc)); retval->id = lobjId; retval->subid = GetCurrentSubTransactionId(); retval->offset = 0; if (flags & INV_WRITE) { retval->snapshot = SnapshotNow; retval->flags = IFS_WRLOCK | IFS_RDLOCK; } else if (flags & INV_READ) { /* * We must register the snapshot in TopTransaction's resowner, because * it must stay alive until the LO is closed rather than until the * current portal shuts down. */ retval->snapshot = RegisterSnapshotOnOwner(GetActiveSnapshot(), TopTransactionResourceOwner); retval->flags = IFS_RDLOCK; } else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid flags for opening a large object: %d", flags))); /* Can't use LargeObjectExists here because it always uses SnapshotNow */ if (!myLargeObjectExists(lobjId, retval->snapshot)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", lobjId))); return retval; }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(RegisteredSnapshots == 0); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ if (IsolationUsesXactSnapshot()) { if (IsolationIsSerializable()) CurrentSnapshot = RegisterSerializableTransaction(&CurrentSnapshotData); else { CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); } registered_xact_snapshot = true; } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* * ContinuousQueryWorkerStartup * * Launches a CQ worker, which continuously generates partial query results to send * back to the combiner process. */ void ContinuousQueryWorkerRun(Portal portal, ContinuousViewState *state, QueryDesc *queryDesc, ResourceOwner owner) { EState *estate = NULL; DestReceiver *dest; CmdType operation; MemoryContext oldcontext; int timeoutms = state->maxwaitms; MemoryContext runcontext; CQProcEntry *entry = GetCQProcEntry(MyCQId); ResourceOwner cqowner = ResourceOwnerCreate(NULL, "CQResourceOwner"); bool savereadonly = XactReadOnly; cq_stat_initialize(state->viewid, MyProcPid); dest = CreateDestReceiver(DestCombiner); SetCombinerDestReceiverParams(dest, MyCQId); /* workers only need read-only transactions */ XactReadOnly = true; runcontext = AllocSetContextCreate(TopMemoryContext, "CQRunContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); elog(LOG, "\"%s\" worker %d running", queryDesc->plannedstmt->cq_target->relname, MyProcPid); MarkWorkerAsRunning(MyCQId, MyWorkerId); pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText); TupleBufferInitLatch(WorkerTupleBuffer, MyCQId, MyWorkerId, &MyProc->procLatch); oldcontext = MemoryContextSwitchTo(runcontext); retry: PG_TRY(); { bool xact_commit = true; TimestampTz last_process = GetCurrentTimestamp(); TimestampTz last_commit = GetCurrentTimestamp(); start_executor(queryDesc, runcontext, cqowner); CurrentResourceOwner = cqowner; estate = queryDesc->estate; operation = queryDesc->operation; /* * Initialize context that lives for the duration of a single iteration * of the main worker loop */ CQExecutionContext = AllocSetContextCreate(estate->es_query_cxt, "CQExecutionContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); estate->es_lastoid = InvalidOid; /* * Startup combiner receiver */ (*dest->rStartup) (dest, operation, queryDesc->tupDesc); for (;;) { if (!TupleBufferHasUnreadSlots()) { if (TimestampDifferenceExceeds(last_process, GetCurrentTimestamp(), state->emptysleepms)) { /* force stats flush */ cq_stat_report(true); pgstat_report_activity(STATE_IDLE, queryDesc->sourceText); TupleBufferWait(WorkerTupleBuffer, MyCQId, MyWorkerId); pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText); } else pg_usleep(Min(WAIT_SLEEP_MS, state->emptysleepms) * 1000); } TupleBufferResetNotify(WorkerTupleBuffer, MyCQId, MyWorkerId); if (xact_commit) StartTransactionCommand(); set_snapshot(estate, cqowner); CurrentResourceOwner = cqowner; MemoryContextSwitchTo(estate->es_query_cxt); estate->es_processed = 0; estate->es_filtered = 0; /* * Run plan on a microbatch */ ExecutePlan(estate, queryDesc->planstate, operation, true, 0, timeoutms, ForwardScanDirection, dest); IncrementCQExecutions(1); TupleBufferClearPinnedSlots(); if (state->long_xact) { if (TimestampDifferenceExceeds(last_commit, GetCurrentTimestamp(), LONG_RUNNING_XACT_DURATION)) xact_commit = true; else xact_commit = false; } unset_snapshot(estate, cqowner); if (xact_commit) { CommitTransactionCommand(); last_commit = GetCurrentTimestamp(); } MemoryContextResetAndDeleteChildren(CQExecutionContext); MemoryContextSwitchTo(runcontext); CurrentResourceOwner = cqowner; if (estate->es_processed || estate->es_filtered) { /* * If the CV query is such that the select does not return any tuples * ex: select id where id=99; and id=99 does not exist, then this reset * will fail. What will happen is that the worker will block at the latch for every * allocated slot, TILL a cv returns a non-zero tuple, at which point * the worker will resume a simple sleep for the threshold time. */ last_process = GetCurrentTimestamp(); /* * Send stats to the collector */ cq_stat_report(false); } /* Has the CQ been deactivated? */ if (!entry->active) { if (ActiveSnapshotSet()) unset_snapshot(estate, cqowner); if (IsTransactionState()) CommitTransactionCommand(); break; } } CurrentResourceOwner = cqowner; /* * The cleanup functions below expect these things to be registered */ RegisterSnapshotOnOwner(estate->es_snapshot, cqowner); RegisterSnapshotOnOwner(queryDesc->snapshot, cqowner); RegisterSnapshotOnOwner(queryDesc->crosscheck_snapshot, cqowner); /* cleanup */ ExecutorFinish(queryDesc); ExecutorEnd(queryDesc); FreeQueryDesc(queryDesc); } PG_CATCH(); { EmitErrorReport(); FlushErrorState(); /* Since the worker is read-only, we can simply commit the transaction. */ if (ActiveSnapshotSet()) unset_snapshot(estate, cqowner); if (IsTransactionState()) CommitTransactionCommand(); TupleBufferUnpinAllPinnedSlots(); TupleBufferClearReaders(); /* This resets the es_query_ctx and in turn the CQExecutionContext */ MemoryContextResetAndDeleteChildren(runcontext); IncrementCQErrors(1); if (continuous_query_crash_recovery) goto retry; } PG_END_TRY(); (*dest->rShutdown) (dest); MemoryContextSwitchTo(oldcontext); MemoryContextDelete(runcontext); XactReadOnly = savereadonly; /* * Remove proc-level stats */ cq_stat_report(true); cq_stat_send_purge(state->viewid, MyProcPid, CQ_STAT_WORKER); CurrentResourceOwner = owner; }