Ejemplo n.º 1
0
/*
 * Send reply message to primary, indicating our current XLOG positions and
 * the current time.
 */
static void
XLogWalRcvSendReply(void)
{
	char		buf[sizeof(StandbyReplyMessage) + 1];
	TimestampTz	now;

	/*
	 * If the user doesn't want status to be reported to the master, be sure
	 * to exit before doing anything at all.
	 */
	if (wal_receiver_status_interval <= 0)
		return;

	/* Get current timestamp. */
	now = GetCurrentTimestamp();

	/*
	 * We can compare the write and flush positions to the last message we
	 * sent without taking any lock, but the apply position requires a spin
	 * lock, so we don't check that unless something else has changed or 10
	 * seconds have passed.  This means that the apply log position will
	 * appear, from the master's point of view, to lag slightly, but since
	 * this is only for reporting purposes and only on idle systems, that's
	 * probably OK.
	 */
	if (XLByteEQ(reply_message.write, LogstreamResult.Write)
		&& XLByteEQ(reply_message.flush, LogstreamResult.Flush)
		&& !TimestampDifferenceExceeds(reply_message.sendTime, now,
			wal_receiver_status_interval * 1000))
		return;

	/* Construct a new message. */
	reply_message.write = LogstreamResult.Write;
	reply_message.flush = LogstreamResult.Flush;
	reply_message.apply = GetXLogReplayRecPtr();
	reply_message.sendTime = now;

	elog(DEBUG2, "sending write %X/%X flush %X/%X apply %X/%X",
				 reply_message.write.xlogid, reply_message.write.xrecoff,
				 reply_message.flush.xlogid, reply_message.flush.xrecoff,
				 reply_message.apply.xlogid, reply_message.apply.xrecoff);

	/* Prepend with the message type and send it. */
	buf[0] = 'r';
	memcpy(&buf[1], &reply_message, sizeof(StandbyReplyMessage));
	walrcv_send(buf, sizeof(StandbyReplyMessage) + 1);
}
Ejemplo n.º 2
0
/*
 * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
 * We wait here for a while then return. If we decide we can't wait any
 * more then we return true, if we can wait some more return false.
 */
static bool
WaitExceedsMaxStandbyDelay(void)
{
	/* Are we past max_standby_delay? */
	if (MaxStandbyDelay >= 0 &&
		TimestampDifferenceExceeds(GetLatestXLogTime(), GetCurrentTimestamp(),
								   MaxStandbyDelay))
		return true;

	/*
	 * Sleep a bit (this is essential to avoid busy-waiting).
	 */
	pg_usleep(standbyWait_us);

	/*
	 * Progressively increase the sleep times, but not to more than 1s,
	 * since pg_usleep isn't interruptable on some platforms.
	 */
	standbyWait_us *= 2;
	if (standbyWait_us > 1000000)
		standbyWait_us = 1000000;

	return false;
}
Ejemplo n.º 3
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 */
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
				BufferAccessStrategy bstrategy)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;
	PGRUsage	ru0;
	TimestampTz starttime = 0;
	bool		scan_all;
	TransactionId freezeTableLimit;

	pg_rusage_init(&ru0);

	/* measure elapsed time iff autovacuum logging requires it */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
		starttime = GetCurrentTimestamp();

	if (vacstmt->options & VACOPT_VERBOSE)
		elevel = INFO;
	else
		elevel = DEBUG2;

	vac_strategy = bstrategy;

	vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
						  onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit, &freezeTableLimit);
	scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
											 freezeTableLimit);

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
	vacrelstats->num_index_scans = 0;

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable > 0 &&
		(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
		lazy_truncate_heap(onerel, vacrelstats);

	/* Vacuum the Free Space Map */
	FreeSpaceMapVacuum(onerel);

	/*
	 * Update statistics in pg_class.  But don't change relfrozenxid if we
	 * skipped any pages.
	 */
	vac_update_relstats(onerel,
						vacrelstats->rel_pages, vacrelstats->new_rel_tuples,
						vacrelstats->hasindex,
					  (vacrelstats->scanned_pages < vacrelstats->rel_pages) ?
						InvalidTransactionId :
						FreezeLimit);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel),
						 onerel->rd_rel->relisshared,
						 vacrelstats->new_rel_tuples);

	/* and log the action if appropriate */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		if (Log_autovacuum_min_duration == 0 ||
			TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
									   Log_autovacuum_min_duration))
			ereport(LOG,
					(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
							"pages: %d removed, %d remain\n"
							"tuples: %.0f removed, %.0f remain\n"
							"system usage: %s",
							get_database_name(MyDatabaseId),
							get_namespace_name(RelationGetNamespace(onerel)),
							RelationGetRelationName(onerel),
							vacrelstats->num_index_scans,
							vacrelstats->pages_removed,
							vacrelstats->rel_pages,
							vacrelstats->tuples_deleted,
							vacrelstats->new_rel_tuples,
							pg_rusage_show(&ru0))));
	}
}
Ejemplo n.º 4
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 */
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
				BufferAccessStrategy bstrategy)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;
	PGRUsage	ru0;
	TimestampTz starttime = 0;
 	long		secs;
 	int			usecs;
 	double		read_rate,
				write_rate;
	bool		scan_all;
	TransactionId freezeTableLimit;
	BlockNumber new_rel_pages;
	double		new_rel_tuples;
	BlockNumber new_rel_allvisible;
	TransactionId new_frozen_xid;

	/* measure elapsed time iff autovacuum logging requires it */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		pg_rusage_init(&ru0);
		starttime = GetCurrentTimestamp();
	}

	if (vacstmt->options & VACOPT_VERBOSE)
		elevel = INFO;
	else
		elevel = DEBUG2;

	vac_strategy = bstrategy;

	vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
						  onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit, &freezeTableLimit);
	scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
											 freezeTableLimit);

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
	vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
	vacrelstats->num_index_scans = 0;

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable > 0 &&
		(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
		lazy_truncate_heap(onerel, vacrelstats);

	/* Vacuum the Free Space Map */
	FreeSpaceMapVacuum(onerel);

	/*
	 * Update statistics in pg_class.
	 *
	 * A corner case here is that if we scanned no pages at all because every
	 * page is all-visible, we should not update relpages/reltuples, because
	 * we have no new information to contribute.  In particular this keeps
	 * us from replacing relpages=reltuples=0 (which means "unknown tuple
	 * density") with nonzero relpages and reltuples=0 (which means "zero
	 * tuple density") unless there's some actual evidence for the latter.
	 *
	 * We do update relallvisible even in the corner case, since if the
	 * table is all-visible we'd definitely like to know that.  But clamp
	 * the value to be not more than what we're setting relpages to.
	 *
	 * Also, don't change relfrozenxid if we skipped any pages, since then
	 * we don't know for certain that all tuples have a newer xmin.
	 */
	new_rel_pages = vacrelstats->rel_pages;
	new_rel_tuples = vacrelstats->new_rel_tuples;
	if (vacrelstats->scanned_pages == 0 && new_rel_pages > 0)
	{
		new_rel_pages = vacrelstats->old_rel_pages;
		new_rel_tuples = vacrelstats->old_rel_tuples;
	}

	new_rel_allvisible = visibilitymap_count(onerel);
	if (new_rel_allvisible > new_rel_pages)
		new_rel_allvisible = new_rel_pages;

	new_frozen_xid = FreezeLimit;
	if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
		new_frozen_xid = InvalidTransactionId;

	vac_update_relstats(onerel,
						new_rel_pages,
						new_rel_tuples,
						new_rel_allvisible,
						vacrelstats->hasindex,
						new_frozen_xid);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel),
						 onerel->rd_rel->relisshared,
						 new_rel_tuples);

	/* and log the action if appropriate */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		TimestampTz	endtime = GetCurrentTimestamp();

		if (Log_autovacuum_min_duration == 0 ||
			TimestampDifferenceExceeds(starttime, endtime,
									   Log_autovacuum_min_duration))
		{
			TimestampDifference(starttime, endtime, &secs, &usecs);

			read_rate = 0;
			write_rate = 0;
			if ((secs > 0) || (usecs > 0))
			{
				read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
					(secs + usecs / 1000000.0);
				write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
 					(secs + usecs / 1000000.0);
			}
			ereport(LOG,
					(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
							"pages: %d removed, %d remain\n"
							"tuples: %.0f removed, %.0f remain\n"
							"buffer usage: %d hits, %d misses, %d dirtied\n"
							"avg read rate: %.3f MiB/s, avg write rate: %.3f MiB/s\n"
							"system usage: %s",
							get_database_name(MyDatabaseId),
							get_namespace_name(RelationGetNamespace(onerel)),
							RelationGetRelationName(onerel),
							vacrelstats->num_index_scans,
							vacrelstats->pages_removed,
							vacrelstats->rel_pages,
							vacrelstats->tuples_deleted,
							vacrelstats->new_rel_tuples,
							VacuumPageHit,
							VacuumPageMiss,
							VacuumPageDirty,
							read_rate,write_rate,
							pg_rusage_show(&ru0))));
		}
	}
}
Ejemplo n.º 5
0
/*
 * This is the main executioner for any query backend that conflicts with
 * recovery processing. Judgement has already been passed on it within
 * a specific rmgr. Here we just issue the orders to the procs. The procs
 * then throw the required error as instructed.
 */
static void
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
									   ProcSignalReason reason)
{
	TimestampTz waitStart;
	char	   *new_status;

	/* Fast exit, to avoid a kernel call if there's no work to be done. */
	if (!VirtualTransactionIdIsValid(*waitlist))
		return;

	waitStart = GetCurrentTimestamp();
	new_status = NULL;			/* we haven't changed the ps display */

	while (VirtualTransactionIdIsValid(*waitlist))
	{
		/* reset standbyWait_us for each xact we wait for */
		standbyWait_us = STANDBY_INITIAL_WAIT_US;

		/* wait until the virtual xid is gone */
		while (!ConditionalVirtualXactLockTableWait(*waitlist))
		{
			/*
			 * Report via ps if we have been waiting for more than 500 msec
			 * (should that be configurable?)
			 */
			if (update_process_title && new_status == NULL &&
				TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
										   500))
			{
				const char *old_status;
				int			len;

				old_status = get_ps_display(&len);
				new_status = (char *) palloc(len + 8 + 1);
				memcpy(new_status, old_status, len);
				strcpy(new_status + len, " waiting");
				set_ps_display(new_status, false);
				new_status[len] = '\0'; /* truncate off " waiting" */
			}

			/* Is it time to kill it? */
			if (WaitExceedsMaxStandbyDelay())
			{
				pid_t		pid;

				/*
				 * Now find out who to throw out of the balloon.
				 */
				Assert(VirtualTransactionIdIsValid(*waitlist));
				pid = CancelVirtualTransaction(*waitlist, reason);

				/*
				 * Wait a little bit for it to die so that we avoid flooding
				 * an unresponsive backend when system is heavily loaded.
				 */
				if (pid != 0)
					pg_usleep(5000L);
			}
		}

		/* The virtual transaction is gone now, wait for the next one */
		waitlist++;
	}

	/* Reset ps display if we changed it */
	if (new_status)
	{
		set_ps_display(new_status, false);
		pfree(new_status);
	}
}
Ejemplo n.º 6
0
/*
 * Start new apply background worker, if possible.
 */
void
logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
						 Oid relid)
{
	BackgroundWorker bgw;
	BackgroundWorkerHandle *bgw_handle;
	uint16		generation;
	int			i;
	int			slot = 0;
	LogicalRepWorker *worker = NULL;
	int			nsyncworkers;
	TimestampTz now;

	ereport(DEBUG1,
			(errmsg("starting logical replication worker for subscription \"%s\"",
					subname)));

	/* Report this after the initial starting message for consistency. */
	if (max_replication_slots == 0)
		ereport(ERROR,
				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
				 errmsg("cannot start logical replication workers when max_replication_slots = 0")));

	/*
	 * We need to do the modification of the shared memory under lock so that
	 * we have consistent view.
	 */
	LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);

retry:
	/* Find unused worker slot. */
	for (i = 0; i < max_logical_replication_workers; i++)
	{
		LogicalRepWorker *w = &LogicalRepCtx->workers[i];

		if (!w->in_use)
		{
			worker = w;
			slot = i;
			break;
		}
	}

	nsyncworkers = logicalrep_sync_worker_count(subid);

	now = GetCurrentTimestamp();

	/*
	 * If we didn't find a free slot, try to do garbage collection.  The
	 * reason we do this is because if some worker failed to start up and its
	 * parent has crashed while waiting, the in_use state was never cleared.
	 */
	if (worker == NULL || nsyncworkers >= max_sync_workers_per_subscription)
	{
		bool		did_cleanup = false;

		for (i = 0; i < max_logical_replication_workers; i++)
		{
			LogicalRepWorker *w = &LogicalRepCtx->workers[i];

			/*
			 * If the worker was marked in use but didn't manage to attach in
			 * time, clean it up.
			 */
			if (w->in_use && !w->proc &&
				TimestampDifferenceExceeds(w->launch_time, now,
										   wal_receiver_timeout))
			{
				elog(WARNING,
					 "logical replication worker for subscription %u took too long to start; canceled",
					 w->subid);

				logicalrep_worker_cleanup(w);
				did_cleanup = true;
			}
		}

		if (did_cleanup)
			goto retry;
	}

	/*
	 * If we reached the sync worker limit per subscription, just exit
	 * silently as we might get here because of an otherwise harmless race
	 * condition.
	 */
	if (nsyncworkers >= max_sync_workers_per_subscription)
	{
		LWLockRelease(LogicalRepWorkerLock);
		return;
	}

	/*
	 * However if there are no more free worker slots, inform user about it
	 * before exiting.
	 */
	if (worker == NULL)
	{
		LWLockRelease(LogicalRepWorkerLock);
		ereport(WARNING,
				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
				 errmsg("out of logical replication worker slots"),
				 errhint("You might need to increase max_logical_replication_workers.")));
		return;
	}

	/* Prepare the worker slot. */
	worker->launch_time = now;
	worker->in_use = true;
	worker->generation++;
	worker->proc = NULL;
	worker->dbid = dbid;
	worker->userid = userid;
	worker->subid = subid;
	worker->relid = relid;
	worker->relstate = SUBREL_STATE_UNKNOWN;
	worker->relstate_lsn = InvalidXLogRecPtr;
	worker->last_lsn = InvalidXLogRecPtr;
	TIMESTAMP_NOBEGIN(worker->last_send_time);
	TIMESTAMP_NOBEGIN(worker->last_recv_time);
	worker->reply_lsn = InvalidXLogRecPtr;
	TIMESTAMP_NOBEGIN(worker->reply_time);

	/* Before releasing lock, remember generation for future identification. */
	generation = worker->generation;

	LWLockRelease(LogicalRepWorkerLock);

	/* Register the new dynamic worker. */
	memset(&bgw, 0, sizeof(bgw));
	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
		BGWORKER_BACKEND_DATABASE_CONNECTION;
	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
	if (OidIsValid(relid))
		snprintf(bgw.bgw_name, BGW_MAXLEN,
				 "logical replication worker for subscription %u sync %u", subid, relid);
	else
		snprintf(bgw.bgw_name, BGW_MAXLEN,
				 "logical replication worker for subscription %u", subid);
	snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");

	bgw.bgw_restart_time = BGW_NEVER_RESTART;
	bgw.bgw_notify_pid = MyProcPid;
	bgw.bgw_main_arg = Int32GetDatum(slot);

	if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
	{
		/* Failed to start worker, so clean up the worker slot. */
		LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
		Assert(generation == worker->generation);
		logicalrep_worker_cleanup(worker);
		LWLockRelease(LogicalRepWorkerLock);

		ereport(WARNING,
				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
				 errmsg("out of background worker slots"),
				 errhint("You might need to increase max_worker_processes.")));
		return;
	}

	/* Now wait until it attaches. */
	WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
}
Ejemplo n.º 7
0
/*
 *	lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 *
 *		This routine vacuums a single heap, cleans out its indexes, and
 *		updates its relpages and reltuples statistics.
 *
 *		At entry, we have already established a transaction and opened
 *		and locked the relation.
 *
 *		The return value indicates whether this function has held off
 *		interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
 */
bool
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
				BufferAccessStrategy bstrategy, List *updated_stats)
{
	LVRelStats *vacrelstats;
	Relation   *Irel;
	int			nindexes;
	BlockNumber possibly_freeable;
	PGRUsage	ru0;
	TimestampTz starttime = 0;
	bool		heldoff = false;

	pg_rusage_init(&ru0);

	/* measure elapsed time iff autovacuum logging requires it */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
		starttime = GetCurrentTimestamp();

	if (vacstmt->verbose)
		elevel = INFO;
	else
		elevel = DEBUG2;

	if (Gp_role == GP_ROLE_DISPATCH)
		elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */

#ifdef FAULT_INJECTOR
	if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeSegmentFileDropPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
	if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt))
	{
			FaultInjector_InjectFaultIfSet(
				CompactionBeforeCleanupPhase,
				DDLNotSpecified,
				"",	// databaseName
				""); // tableName
	}
#endif

	/*
	 * MPP-23647.  Update xid limits for heap as well as appendonly
	 * relations.  This allows setting relfrozenxid to correct value
	 * for an appendonly (AO/CO) table.
	 */
	vac_strategy = bstrategy;

	vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
						  &OldestXmin, &FreezeLimit);

	/*
	 * Execute the various vacuum operations. Appendonly tables are treated
	 * differently.
	 */
	if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel))
	{
		lazy_vacuum_aorel(onerel, vacstmt, updated_stats);
		return false;
	}

	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));

	/* heap relation */

	/* Set threshold for interesting free space = average request size */
	/* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
	vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);

	vacrelstats->num_index_scans = 0;

	/* Open all indexes of the relation */
	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
	vacrelstats->hasindex = (nindexes > 0);

	/* Do the vacuuming */
	lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats);

	/* Done with indexes */
	vac_close_indexes(nindexes, Irel, NoLock);

	/*
	 * Optionally truncate the relation.
	 *
	 * Don't even think about it unless we have a shot at releasing a goodly
	 * number of pages.  Otherwise, the time taken isn't worth it.
	 *
	 * Note that after we've truncated the heap, it's too late to abort the
	 * transaction; doing so would lose the sinval messages needed to tell
	 * the other backends about the table being shrunk.  We prevent interrupts
	 * in that case; caller is responsible for re-enabling them after
	 * committing the transaction.
	 */
	possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
	if (possibly_freeable > 0 &&
		(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
		 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
	{
		HOLD_INTERRUPTS();
		heldoff = true;
		lazy_truncate_heap(onerel, vacrelstats);
	}

	/* Update shared free space map with final free space info */
	lazy_update_fsm(onerel, vacrelstats);

	if (vacrelstats->tot_free_pages > MaxFSMPages)
		ereport(WARNING,
				(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
						get_namespace_name(RelationGetNamespace(onerel)),
						RelationGetRelationName(onerel)),
				 /* Only suggest VACUUM FULL if > 20% free */
				 (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
				 errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
				 errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));

	/* Update statistics in pg_class */
	vac_update_relstats_from_list(onerel,
						vacrelstats->rel_pages,
						vacrelstats->rel_tuples,
						vacrelstats->hasindex,
						FreezeLimit,
						updated_stats);

	/* report results to the stats collector, too */
	pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
						 true /*vacrelstats->scanned_all*/,
						 vacstmt->analyze, vacrelstats->rel_tuples);

	if (gp_indexcheck_vacuum == INDEX_CHECK_ALL ||
		(gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM &&
		 PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel)))
	{
		int			i;

		for (i = 0; i < nindexes; i++)
		{
			if (Irel[i]->rd_rel->relam == BTREE_AM_OID)
				_bt_validate_vacuum(Irel[i], onerel, OldestXmin);
		}
	}

	/* and log the action if appropriate */
	if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
	{
		if (Log_autovacuum_min_duration == 0 ||
			TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
									   Log_autovacuum_min_duration))
			ereport(LOG,
					(errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
							"pages: %d removed, %d remain\n"
							"tuples: %.0f removed, %.0f remain\n"
							"system usage: %s",
							get_database_name(MyDatabaseId),
							get_namespace_name(RelationGetNamespace(onerel)),
							RelationGetRelationName(onerel),
							vacrelstats->num_index_scans,
						  vacrelstats->pages_removed, vacrelstats->rel_pages,
						vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
							pg_rusage_show(&ru0))));
	}

	return heldoff;
}
Ejemplo n.º 8
0
/*
 * ContinuousQueryWorkerStartup
 *
 * Launches a CQ worker, which continuously generates partial query results to send
 * back to the combiner process.
 */
void
ContinuousQueryWorkerRun(Portal portal, ContinuousViewState *state, QueryDesc *queryDesc, ResourceOwner owner)
{
	EState	   *estate = NULL;
	DestReceiver *dest;
	CmdType		operation;
	MemoryContext oldcontext;
	int timeoutms = state->maxwaitms;
	MemoryContext runcontext;
	CQProcEntry *entry = GetCQProcEntry(MyCQId);
	ResourceOwner cqowner = ResourceOwnerCreate(NULL, "CQResourceOwner");
	bool savereadonly = XactReadOnly;

	cq_stat_initialize(state->viewid, MyProcPid);

	dest = CreateDestReceiver(DestCombiner);
	SetCombinerDestReceiverParams(dest, MyCQId);

	/* workers only need read-only transactions */
	XactReadOnly = true;

	runcontext = AllocSetContextCreate(TopMemoryContext, "CQRunContext",
			ALLOCSET_DEFAULT_MINSIZE,
			ALLOCSET_DEFAULT_INITSIZE,
			ALLOCSET_DEFAULT_MAXSIZE);

	elog(LOG, "\"%s\" worker %d running", queryDesc->plannedstmt->cq_target->relname, MyProcPid);
	MarkWorkerAsRunning(MyCQId, MyWorkerId);
	pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText);

	TupleBufferInitLatch(WorkerTupleBuffer, MyCQId, MyWorkerId, &MyProc->procLatch);

	oldcontext = MemoryContextSwitchTo(runcontext);

retry:
	PG_TRY();
	{
		bool xact_commit = true;
		TimestampTz last_process = GetCurrentTimestamp();
		TimestampTz last_commit = GetCurrentTimestamp();

		start_executor(queryDesc, runcontext, cqowner);

		CurrentResourceOwner = cqowner;

		estate = queryDesc->estate;
		operation = queryDesc->operation;

		/*
		 * Initialize context that lives for the duration of a single iteration
		 * of the main worker loop
		 */
		CQExecutionContext = AllocSetContextCreate(estate->es_query_cxt, "CQExecutionContext",
				ALLOCSET_DEFAULT_MINSIZE,
				ALLOCSET_DEFAULT_INITSIZE,
				ALLOCSET_DEFAULT_MAXSIZE);

		estate->es_lastoid = InvalidOid;

		/*
		 * Startup combiner receiver
		 */
		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);

		for (;;)
		{
			if (!TupleBufferHasUnreadSlots())
			{
				if (TimestampDifferenceExceeds(last_process, GetCurrentTimestamp(), state->emptysleepms))
				{
					/* force stats flush */
					cq_stat_report(true);

					pgstat_report_activity(STATE_IDLE, queryDesc->sourceText);
					TupleBufferWait(WorkerTupleBuffer, MyCQId, MyWorkerId);
					pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText);
				}
				else
					pg_usleep(Min(WAIT_SLEEP_MS, state->emptysleepms) * 1000);
			}

			TupleBufferResetNotify(WorkerTupleBuffer, MyCQId, MyWorkerId);

			if (xact_commit)
				StartTransactionCommand();

			set_snapshot(estate, cqowner);
			CurrentResourceOwner = cqowner;
			MemoryContextSwitchTo(estate->es_query_cxt);

			estate->es_processed = 0;
			estate->es_filtered = 0;

			/*
			 * Run plan on a microbatch
			 */
			ExecutePlan(estate, queryDesc->planstate, operation,
					true, 0, timeoutms, ForwardScanDirection, dest);

			IncrementCQExecutions(1);
			TupleBufferClearPinnedSlots();

			if (state->long_xact)
			{
				if (TimestampDifferenceExceeds(last_commit, GetCurrentTimestamp(), LONG_RUNNING_XACT_DURATION))
					xact_commit = true;
				else
					xact_commit = false;
			}

			unset_snapshot(estate, cqowner);
			if (xact_commit)
			{
				CommitTransactionCommand();
				last_commit = GetCurrentTimestamp();
			}

			MemoryContextResetAndDeleteChildren(CQExecutionContext);
			MemoryContextSwitchTo(runcontext);
			CurrentResourceOwner = cqowner;

			if (estate->es_processed || estate->es_filtered)
			{
				/*
				 * If the CV query is such that the select does not return any tuples
				 * ex: select id where id=99; and id=99 does not exist, then this reset
				 * will fail. What will happen is that the worker will block at the latch for every
				 * allocated slot, TILL a cv returns a non-zero tuple, at which point
				 * the worker will resume a simple sleep for the threshold time.
				 */
				last_process = GetCurrentTimestamp();

				/*
				 * Send stats to the collector
				 */
				cq_stat_report(false);
			}

			/* Has the CQ been deactivated? */
			if (!entry->active)
			{
				if (ActiveSnapshotSet())
					unset_snapshot(estate, cqowner);
				if (IsTransactionState())
					CommitTransactionCommand();
				break;
			}
		}

		CurrentResourceOwner = cqowner;

		/*
		 * The cleanup functions below expect these things to be registered
		 */
		RegisterSnapshotOnOwner(estate->es_snapshot, cqowner);
		RegisterSnapshotOnOwner(queryDesc->snapshot, cqowner);
		RegisterSnapshotOnOwner(queryDesc->crosscheck_snapshot, cqowner);

		/* cleanup */
		ExecutorFinish(queryDesc);
		ExecutorEnd(queryDesc);
		FreeQueryDesc(queryDesc);
	}
	PG_CATCH();
	{
		EmitErrorReport();
		FlushErrorState();

		/* Since the worker is read-only, we can simply commit the transaction. */
		if (ActiveSnapshotSet())
			unset_snapshot(estate, cqowner);
		if (IsTransactionState())
			CommitTransactionCommand();

		TupleBufferUnpinAllPinnedSlots();
		TupleBufferClearReaders();

		/* This resets the es_query_ctx and in turn the CQExecutionContext */
		MemoryContextResetAndDeleteChildren(runcontext);

		IncrementCQErrors(1);

		if (continuous_query_crash_recovery)
			goto retry;
	}
	PG_END_TRY();

	(*dest->rShutdown) (dest);

	MemoryContextSwitchTo(oldcontext);
	MemoryContextDelete(runcontext);

	XactReadOnly = savereadonly;

	/*
	 * Remove proc-level stats
	 */
	cq_stat_report(true);
	cq_stat_send_purge(state->viewid, MyProcPid, CQ_STAT_WORKER);

	CurrentResourceOwner = owner;
}
Ejemplo n.º 9
0
/*
 * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
 * to resolve conflicts with other backends holding buffer pins.
 *
 * We either resolve conflicts immediately or set a SIGALRM to wake us at
 * the limit of our patience. The sleep in LockBufferForCleanup() is
 * performed here, for code clarity.
 *
 * Resolve conflict by sending a SIGUSR1 reason to all backends to check if
 * they hold one of the buffer pins that is blocking Startup process. If so,
 * backends will take an appropriate error action, ERROR or FATAL.
 *
 * We also check for deadlocks before we wait, though applications that cause
 * these will be extremely rare.  Deadlocks occur because if queries
 * wait on a lock, that must be behind an AccessExclusiveLock, which can only
 * be cleared if the Startup process replays a transaction completion record.
 * If Startup process is also waiting then that is a deadlock. The deadlock
 * can occur if the query is waiting and then the Startup sleeps, or if
 * Startup is sleeping and the query waits on a lock. We protect against
 * only the former sequence here, the latter sequence is checked prior to
 * the query sleeping, in CheckRecoveryConflictDeadlock().
 */
void
ResolveRecoveryConflictWithBufferPin(void)
{
	bool		sig_alarm_enabled = false;

	Assert(InHotStandby);

	if (MaxStandbyDelay == 0)
	{
		/*
		 * We don't want to wait, so just tell everybody holding the pin to
		 * get out of town.
		 */
		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
	}
	else if (MaxStandbyDelay < 0)
	{
		/*
		 * Send out a request to check for buffer pin deadlocks before we
		 * wait. This is fairly cheap, so no need to wait for deadlock timeout
		 * before trying to send it out.
		 */
		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
	}
	else
	{
		TimestampTz then = GetLatestXLogTime();
		TimestampTz now = GetCurrentTimestamp();

		/* Are we past max_standby_delay? */
		if (TimestampDifferenceExceeds(then, now, MaxStandbyDelay))
		{
			/*
			 * We're already behind, so clear a path as quickly as possible.
			 */
			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
		}
		else
		{
			TimestampTz fin_time;		/* Expected wake-up time by timer */
			long		timer_delay_secs;		/* Amount of time we set timer
												 * for */
			int			timer_delay_usecs;

			/*
			 * Send out a request to check for buffer pin deadlocks before we
			 * wait. This is fairly cheap, so no need to wait for deadlock
			 * timeout before trying to send it out.
			 */
			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);

			/*
			 * How much longer we should wait?
			 */
			fin_time = TimestampTzPlusMilliseconds(then, MaxStandbyDelay);

			TimestampDifference(now, fin_time,
								&timer_delay_secs, &timer_delay_usecs);

			/*
			 * It's possible that the difference is less than a microsecond;
			 * ensure we don't cancel, rather than set, the interrupt.
			 */
			if (timer_delay_secs == 0 && timer_delay_usecs == 0)
				timer_delay_usecs = 1;

			if (enable_standby_sig_alarm(timer_delay_secs, timer_delay_usecs, fin_time))
				sig_alarm_enabled = true;
			else
				elog(FATAL, "could not set timer for process wakeup");
		}
	}

	/* Wait to be signaled by UnpinBuffer() */
	ProcWaitForSignal();

	if (sig_alarm_enabled)
	{
		if (!disable_standby_sig_alarm())
			elog(FATAL, "could not disable timer for process wakeup");
	}
}
Ejemplo n.º 10
0
/*
 * purge_dropped_db_segments
 */
static void
purge_dropped_db_segments(bool force)
{
	static TimestampTz last_purge_time = 0;
	List *db_oids;
	List *dbs_to_remove = NIL;
	HASH_SEQ_STATUS status;
	broker_db_meta *db_meta;

	if (!force && !TimestampDifferenceExceeds(last_purge_time, GetCurrentTimestamp(), 10 * 1000)) /* 10s */
		return;

	db_oids = get_database_oids();

	LWLockAcquire(IPCMessageBrokerIndexLock, LW_SHARED);

	hash_seq_init(&status, broker_meta->db_meta_hash);
	while ((db_meta = (broker_db_meta *) hash_seq_search(&status)) != NULL)
	{
		bool found = false;
		ListCell *lc;

		foreach(lc, db_oids)
		{
			if (lfirst_oid(lc) == db_meta->dbid)
			{
				found = true;
				break;
			}
		}

		if (!found)
			dbs_to_remove = lappend_oid(dbs_to_remove, db_meta->dbid);
	}

	LWLockRelease(IPCMessageBrokerIndexLock);

	if (list_length(dbs_to_remove))
	{
		ListCell *lc;

		LWLockAcquire(IPCMessageBrokerIndexLock, LW_EXCLUSIVE);

		foreach(lc, dbs_to_remove)
		{
			Oid dbid = lfirst_oid(lc);
			bool found;

			db_meta = hash_search(broker_meta->db_meta_hash, &dbid, HASH_FIND, &found);
			Assert(found);

			Assert(db_meta->handle > 0);

			/* detach from main db segment */
			if (db_meta->segment)
				dsm_detach(db_meta->segment);

			if (db_meta->lqueues)
			{
				int i;

				for (i = 0; i < continuous_query_num_workers; i++)
				{
					local_queue *local_buf = &db_meta->lqueues[i];
					if (local_buf->slots)
						list_free_deep(local_buf->slots);
				}

				pfree(db_meta->lqueues);
			}

			hash_search(broker_meta->db_meta_hash, &dbid, HASH_REMOVE, &found);
			Assert(found);
		}

		mark_unused_locks_as_free(db_oids);

		LWLockRelease(IPCMessageBrokerIndexLock);
	}
Ejemplo n.º 11
0
/*
 * Send hot standby feedback message to primary, plus the current time,
 * in case they don't have a watch.
 */
static void
XLogWalRcvSendHSFeedback(void)
{
	char		buf[sizeof(StandbyHSFeedbackMessage) + 1];
	TimestampTz now;
	TransactionId nextXid;
	uint32		nextEpoch;
	TransactionId xmin;

	/*
	 * If the user doesn't want status to be reported to the master, be sure
	 * to exit before doing anything at all.
	 */
	if (wal_receiver_status_interval <= 0 || !hot_standby_feedback)
		return;

	/* Get current timestamp. */
	now = GetCurrentTimestamp();

	/*
	 * Send feedback at most once per wal_receiver_status_interval.
	 */
	if (!TimestampDifferenceExceeds(feedback_message.sendTime, now,
									wal_receiver_status_interval * 1000))
		return;

	/*
	 * If Hot Standby is not yet active there is nothing to send. Check this
	 * after the interval has expired to reduce number of calls.
	 */
	if (!HotStandbyActive())
		return;

	/*
	 * Make the expensive call to get the oldest xmin once we are certain
	 * everything else has been checked.
	 */
	xmin = GetOldestXmin(true, false);

	/*
	 * Get epoch and adjust if nextXid and oldestXmin are different sides of
	 * the epoch boundary.
	 */
	GetNextXidAndEpoch(&nextXid, &nextEpoch);
	if (nextXid < xmin)
		nextEpoch--;

	/*
	 * Always send feedback message.
	 */
	feedback_message.sendTime = now;
	feedback_message.xmin = xmin;
	feedback_message.epoch = nextEpoch;

	elog(DEBUG2, "sending hot standby feedback xmin %u epoch %u",
		 feedback_message.xmin,
		 feedback_message.epoch);

	/* Prepend with the message type and send it. */
	buf[0] = 'h';
	memcpy(&buf[1], &feedback_message, sizeof(StandbyHSFeedbackMessage));
	walrcv_send(buf, sizeof(StandbyHSFeedbackMessage) + 1);
}