int CommitTranGTM(GlobalTransactionId gxid) { int ret; if (!GlobalTransactionIdIsValid(gxid)) return 0; CheckConnection(); ret = commit_transaction(conn, gxid); /* * If something went wrong (timeout), try and reset GTM connection. * We will close the transaction locally anyway, and closing GTM will force * it to be closed on GTM. */ if (ret < 0) { CloseGTM(); InitGTM(); } /* Close connection in case commit is done by autovacuum worker or launcher */ if (IsAutoVacuumWorkerProcess() || IsAutoVacuumLauncherProcess()) CloseGTM(); return ret; }
void InitGTM(void) { /* 256 bytes should be enough */ char conn_str[256]; /* If this thread is postmaster itself, it contacts gtm identifying itself */ if (!IsUnderPostmaster) { GTM_PGXCNodeType remote_type = GTM_NODE_DEFAULT; if (IS_PGXC_COORDINATOR) remote_type = GTM_NODE_COORDINATOR; else if (IS_PGXC_DATANODE) remote_type = GTM_NODE_DATANODE; sprintf(conn_str, "host=%s port=%d node_name=%s remote_type=%d postmaster=1", GtmHost, GtmPort, PGXCNodeName, remote_type); /* Log activity of GTM connections */ elog(DEBUG1, "Postmaster: connection established to GTM with string %s", conn_str); } else { sprintf(conn_str, "host=%s port=%d node_name=%s", GtmHost, GtmPort, PGXCNodeName); /* Log activity of GTM connections */ if (IsAutoVacuumWorkerProcess()) elog(DEBUG1, "Autovacuum worker: connection established to GTM with string %s", conn_str); else if (IsAutoVacuumLauncherProcess()) elog(DEBUG1, "Autovacuum launcher: connection established to GTM with string %s", conn_str); else elog(DEBUG1, "Postmaster child: connection established to GTM with string %s", conn_str); } conn = PQconnectGTM(conn_str); if (GTMPQstatus(conn) != CONNECTION_OK) { int save_errno = errno; ereport(WARNING, (errcode(ERRCODE_CONNECTION_EXCEPTION), errmsg("can not connect to GTM: %m"))); errno = save_errno; CloseGTM(); } #ifdef XCP else if (IS_PGXC_COORDINATOR) register_session(conn, PGXCNodeName, MyProcPid, MyBackendId); #endif }
/* ---------------------------------------------------------------- * proc_exit * * this function calls all the callbacks registered * for it (to free resources) and then calls exit. * * This should be the only function to call exit(). * -cim 2/6/90 * * Unfortunately, we can't really guarantee that add-on code * obeys the rule of not calling exit() directly. So, while * this is the preferred way out of the system, we also register * an atexit callback that will make sure cleanup happens. * ---------------------------------------------------------------- */ void proc_exit(int code) { pqsignal(SIGALRM, SIG_IGN); /* Clean up everything that must be cleaned up */ proc_exit_prepare(code); #ifdef PROFILE_PID_DIR { /* * If we are profiling ourself then gprof's mcleanup() is about to * write out a profile to ./gmon.out. Since mcleanup() always uses a * fixed file name, each backend will overwrite earlier profiles. To * fix that, we create a separate subdirectory for each backend * (./gprof/pid) and 'cd' to that subdirectory before we exit() - that * forces mcleanup() to write each profile into its own directory. We * end up with something like: $PGDATA/gprof/8829/gmon.out * $PGDATA/gprof/8845/gmon.out ... * * To avoid undesirable disk space bloat, autovacuum workers are * discriminated against: all their gmon.out files go into the same * subdirectory. Without this, an installation that is "just sitting * there" nonetheless eats megabytes of disk space every few seconds. * * Note that we do this here instead of in an on_proc_exit() callback * because we want to ensure that this code executes last - we don't * want to interfere with any other on_proc_exit() callback. For the * same reason, we do not include it in proc_exit_prepare ... so if * you are exiting in the "wrong way" you won't drop your profile in a * nice place. */ char gprofDirName[32]; if (IsAutoVacuumWorkerProcess()) snprintf(gprofDirName, 32, "gprof/avworker"); else snprintf(gprofDirName, 32, "gprof/%d", (int) getpid()); mkdir("gprof", S_IRWXU | S_IRWXG | S_IRWXO); mkdir(gprofDirName, S_IRWXU | S_IRWXG | S_IRWXO); chdir(gprofDirName); } #endif elog(DEBUG3, "exit(%d)", code); exit(code); }
void CloseGTM(void) { GTMPQfinish(conn); conn = NULL; /* Log activity of GTM connections */ if (!IsUnderPostmaster) elog(DEBUG1, "Postmaster: connection to GTM closed"); else if (IsAutoVacuumWorkerProcess()) elog(DEBUG1, "Autovacuum worker: connection to GTM closed"); else if (IsAutoVacuumLauncherProcess()) elog(DEBUG1, "Autovacuum launcher: connection to GTM closed"); else elog(DEBUG1, "Postmaster child: connection to GTM closed"); }
int CommitTranGTM(GlobalTransactionId gxid, int waited_xid_count, GlobalTransactionId *waited_xids) { int ret; if (!GlobalTransactionIdIsValid(gxid)) return 0; CheckConnection(); #ifdef XCP ret = -1; if (conn) #endif ret = commit_transaction(conn, gxid, waited_xid_count, waited_xids); /* * If something went wrong (timeout), try and reset GTM connection. * We will close the transaction locally anyway, and closing GTM will force * it to be closed on GTM. */ if (ret < 0) { CloseGTM(); InitGTM(); #ifdef XCP if (conn) ret = commit_transaction(conn, gxid, waited_xid_count, waited_xids); #endif } /* Close connection in case commit is done by autovacuum worker or launcher */ if (IsAutoVacuumWorkerProcess() || IsAutoVacuumLauncherProcess()) CloseGTM(); currentGxid = InvalidGlobalTransactionId; return ret; }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. The autovacuum launcher process * doesn't use any parameters either, because it only goes far enough to be * able to read pg_database; it doesn't connect to any particular database. * In walsender mode only username is used. * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not completely filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool am_superuser; GucContext gucctx; char *fullpath; char dbname[NAMEDATALEN]; elog(DEBUG3, "InitPostgres"); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG, if appropriate. In * bootstrap case we skip this since StartupXLOG() was run instead. */ if (!bootstrap) (void) RecoveryInProgress(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least an entry for pg_database. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ if (IsAutoVacuumLauncherProcess()) return; /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. (This * is critical for anything that reads heap pages, because HOT may decide * to prune them even if the process doesn't attempt to modify any * tuples.) */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Set up the global variables holding database id and default tablespace. * But note we won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap and walsender case, otherwise we * have to look up the db's entry in pg_database. */ if (bootstrap || am_walsender) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap && !am_walsender) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap && !am_walsender) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap && !am_walsender) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* * Perform client authentication if necessary, then figure out our * postgres user ID, and see if we are a superuser. * * In standalone mode and in autovacuum worker processes, we use a fixed * ID, otherwise we figure it out from the authenticated user name. */ if (bootstrap || IsAutoVacuumWorkerProcess()) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* Process pg_db_role_setting options */ process_settings(MyDatabaseId, GetSessionUserId()); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap && !am_walsender) CheckMyDatabase(dbname, am_superuser); /* * If we're trying to shut down, only superusers can connect. */ if (!am_superuser && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"))); /* * Now process any command-line switches that were included in the startup * packet, if we are in a regular backend. We couldn't do this before * because we didn't know if client is a superuser. */ gucctx = am_superuser ? PGC_SUSET : PGC_BACKEND; if (MyProcPort != NULL && MyProcPort->cmdline_options != NULL) { /* * The maximum possible number of commandline arguments that could * come from MyProcPort->cmdline_options is (strlen + 1) / 2; see * pg_split_opts(). */ char **av; int maxac; int ac; maxac = 2 + (strlen(MyProcPort->cmdline_options) + 1) / 2; av = (char **) palloc(maxac * sizeof(char *)); ac = 0; av[ac++] = "postgres"; /* Note this mangles MyProcPort->cmdline_options */ pg_split_opts(av, &ac, MyProcPort->cmdline_options); av[ac] = NULL; Assert(ac < maxac); (void) process_postgres_switches(ac, av, gucctx); } /* * Process any additional GUC variable settings passed in startup packet. * These are handled exactly like command-line variables. */ if (MyProcPort != NULL) { ListCell *gucopts = list_head(MyProcPort->guc_options); while (gucopts) { char *name; char *value; name = lfirst(gucopts); gucopts = lnext(gucopts); value = lfirst(gucopts); gucopts = lnext(gucopts); SetConfigOption(name, value, gucctx, PGC_S_CLIENT); } } /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* reset the database for walsender */ if (am_walsender) MyProc->databaseId = MyDatabaseId = InvalidOid; /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the computed database * name is passed out to the caller as a palloc'ed string in out_dbname. * * In bootstrap mode no parameters are used. * * The return value indicates whether the userID is a superuser. (That * can only be tested inside a transaction, so we want to do it during * the startup transaction rather than doing a separate one in postgres.c.) * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ bool InitPostgres(const char *in_dbname, Oid dboid, const char *username, char **out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool autovacuum = IsAutoVacuumWorkerProcess(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; /* * Set up the global variables holding database id and path. But note we * won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else { /* * Find tablespace of the database we're about to open. Since we're * not yet up and running we have to use one of the hackish * FindMyDatabase variants, which look in the flat-file copy of * pg_database. * * If the in_dbname param is NULL, lookup database by OID. */ if (in_dbname == NULL) { if (!FindMyDatabaseByOid(dboid, dbname, &MyDatabaseTableSpace)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); MyDatabaseId = dboid; /* pass the database name to the caller */ *out_dbname = pstrdup(dbname); } else { if (!FindMyDatabase(in_dbname, &MyDatabaseId, &MyDatabaseTableSpace)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); /* our database name is gotten from the caller */ strlcpy(dbname, in_dbname, NAMEDATALEN); } } fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); SetDatabasePath(fullpath); /* * Finish filling in the PGPROC struct, and add it to the ProcArray. (We * need to know MyDatabaseId before we can do this, since it's entered * into the PGPROC struct.) * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. In bootstrap case we may * skip this since StartupXLOG() was run instead. */ if (!bootstrap) InitXLOGAccess(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Now that we have a transaction, we can take locks. Take a writer's * lock on the database we are trying to connect to. If there is a * concurrently running DROP DATABASE on that database, this will block us * until it finishes (and has updated the flat file copy of pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck the flat file copy of pg_database to make sure the target * database hasn't gone away. If there was a concurrent DROP DATABASE, * this ensures we will die cleanly without creating a mess. */ if (!bootstrap) { Oid dbid2; Oid tsid2; if (!FindMyDatabase(dbname, &dbid2, &tsid2) || dbid2 != MyDatabaseId || tsid2 != MyDatabaseTableSpace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase2(); /* * Figure out our postgres user id, and see if we are a superuser. * * In standalone mode and in the autovacuum process, we use a fixed id, * otherwise we figure it out from the authenticated user name. */ if (bootstrap || autovacuum) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.", username))); } else { /* normal multiuser case */ InitializeSessionUserId(username); am_superuser = superuser(); } /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Read the real pg_database row for our database, check permissions and * set up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * If we're trying to shut down, only superusers can connect. */ if (!am_superuser && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"))); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); return am_superuser; }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. * * The return value indicates whether this function has held off * interrupts -- caller must RESUME_INTERRUPTS() after commit if true. */ bool lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy, List *updated_stats) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; PGRUsage ru0; TimestampTz starttime = 0; bool heldoff = false; pg_rusage_init(&ru0); /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0) starttime = GetCurrentTimestamp(); if (vacstmt->verbose) elevel = INFO; else elevel = DEBUG2; if (Gp_role == GP_ROLE_DISPATCH) elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */ #ifdef FAULT_INJECTOR if (vacuumStatement_IsInAppendOnlyDropPhase(vacstmt)) { FaultInjector_InjectFaultIfSet( CompactionBeforeSegmentFileDropPhase, DDLNotSpecified, "", // databaseName ""); // tableName } if (vacummStatement_IsInAppendOnlyCleanupPhase(vacstmt)) { FaultInjector_InjectFaultIfSet( CompactionBeforeCleanupPhase, DDLNotSpecified, "", // databaseName ""); // tableName } #endif /* * MPP-23647. Update xid limits for heap as well as appendonly * relations. This allows setting relfrozenxid to correct value * for an appendonly (AO/CO) table. */ vac_strategy = bstrategy; vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit); /* * Execute the various vacuum operations. Appendonly tables are treated * differently. */ if (RelationIsAoRows(onerel) || RelationIsAoCols(onerel)) { lazy_vacuum_aorel(onerel, vacstmt, updated_stats); return false; } vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); /* heap relation */ /* Set threshold for interesting free space = average request size */ /* XXX should we scale it up or down? Adjust vacuum.c too, if so */ vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node); vacrelstats->num_index_scans = 0; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. * * Note that after we've truncated the heap, it's too late to abort the * transaction; doing so would lose the sinval messages needed to tell * the other backends about the table being shrunk. We prevent interrupts * in that case; caller is responsible for re-enabling them after * committing the transaction. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) { HOLD_INTERRUPTS(); heldoff = true; lazy_truncate_heap(onerel, vacrelstats); } /* Update shared free space map with final free space info */ lazy_update_fsm(onerel, vacrelstats); if (vacrelstats->tot_free_pages > MaxFSMPages) ereport(WARNING, (errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space", get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel)), /* Only suggest VACUUM FULL if > 20% free */ (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ? errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") : errhint("Consider increasing the configuration parameter \"max_fsm_pages\"."))); /* Update statistics in pg_class */ vac_update_relstats_from_list(onerel, vacrelstats->rel_pages, vacrelstats->rel_tuples, vacrelstats->hasindex, FreezeLimit, updated_stats); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, true /*vacrelstats->scanned_all*/, vacstmt->analyze, vacrelstats->rel_tuples); if (gp_indexcheck_vacuum == INDEX_CHECK_ALL || (gp_indexcheck_vacuum == INDEX_CHECK_SYSTEM && PG_CATALOG_NAMESPACE == RelationGetNamespace(onerel))) { int i; for (i = 0; i < nindexes; i++) { if (Irel[i]->rd_rel->relam == BTREE_AM_OID) _bt_validate_vacuum(Irel[i], onerel, OldestXmin); } } /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { if (Log_autovacuum_min_duration == 0 || TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), Log_autovacuum_min_duration)) ereport(LOG, (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n" "pages: %d removed, %d remain\n" "tuples: %.0f removed, %.0f remain\n" "system usage: %s", get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans, vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->tuples_deleted, vacrelstats->rel_tuples, pg_rusage_show(&ru0)))); } return heldoff; }
/* * Move tuples from pending pages into regular GIN structure. * * On first glance it looks completely not crash-safe. But if we crash * after posting entries to the main index and before removing them from the * pending list, it's okay because when we redo the posting later on, nothing * bad will happen. * * fill_fsm indicates that ginInsertCleanup should add deleted pages * to FSM otherwise caller is responsible to put deleted pages into * FSM. * * If stats isn't null, we count deleted pending pages into the counts. */ void ginInsertCleanup(GinState *ginstate, bool full_clean, bool fill_fsm, IndexBulkDeleteResult *stats) { Relation index = ginstate->index; Buffer metabuffer, buffer; Page metapage, page; GinMetaPageData *metadata; MemoryContext opCtx, oldCtx; BuildAccumulator accum; KeyArray datums; BlockNumber blkno, blknoFinish; bool cleanupFinish = false; bool fsm_vac = false; Size workMemory; bool inVacuum = (stats == NULL); /* * We would like to prevent concurrent cleanup process. For that we will * lock metapage in exclusive mode using LockPage() call. Nobody other * will use that lock for metapage, so we keep possibility of concurrent * insertion into pending list */ if (inVacuum) { /* * We are called from [auto]vacuum/analyze or gin_clean_pending_list() * and we would like to wait concurrent cleanup to finish. */ LockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); workMemory = (IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1) ? autovacuum_work_mem : maintenance_work_mem; } else { /* * We are called from regular insert and if we see concurrent cleanup * just exit in hope that concurrent process will clean up pending * list. */ if (!ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock)) return; workMemory = work_mem; } metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber) { /* Nothing to do */ UnlockReleaseBuffer(metabuffer); UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); return; } /* * Remember a tail page to prevent infinite cleanup if other backends add * new tuples faster than we can cleanup. */ blknoFinish = metadata->tail; /* * Read and lock head of pending list */ blkno = metadata->head; buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); LockBuffer(metabuffer, GIN_UNLOCK); /* * Initialize. All temporary space will be in opCtx */ opCtx = AllocSetContextCreate(CurrentMemoryContext, "GIN insert cleanup temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldCtx = MemoryContextSwitchTo(opCtx); initKeyArray(&datums, 128); ginInitBA(&accum); accum.ginstate = ginstate; /* * At the top of this loop, we have pin and lock on the current page of * the pending list. However, we'll release that before exiting the loop. * Note we also have pin but not lock on the metapage. */ for (;;) { Assert(!GinPageIsDeleted(page)); /* * Are we walk through the page which as we remember was a tail when * we start our cleanup? But if caller asks us to clean up whole * pending list then ignore old tail, we will work until list becomes * empty. */ if (blkno == blknoFinish && full_clean == false) cleanupFinish = true; /* * read page's datums into accum */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); vacuum_delay_point(); /* * Is it time to flush memory to disk? Flush if we are at the end of * the pending list, or if we have a full row and memory is getting * full. */ if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || (GinPageHasFullRow(page) && (accum.allocatedMemory >= workMemory * 1024L))) { ItemPointerData *list; uint32 nlist; Datum key; GinNullCategory category; OffsetNumber maxoff, attnum; /* * Unlock current page to increase performance. Changes of page * will be checked later by comparing maxoff after completion of * memory flush. */ maxoff = PageGetMaxOffsetNumber(page); LockBuffer(buffer, GIN_UNLOCK); /* * Moving collected data into regular structure can take * significant amount of time - so, run it without locking pending * list. */ ginBeginBAScan(&accum); while ((list = ginGetBAEntry(&accum, &attnum, &key, &category, &nlist)) != NULL) { ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); vacuum_delay_point(); } /* * Lock the whole list to remove pages */ LockBuffer(metabuffer, GIN_EXCLUSIVE); LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsDeleted(page)); /* * While we left the page unlocked, more stuff might have gotten * added to it. If so, process those entries immediately. There * shouldn't be very many, so we don't worry about the fact that * we're doing this with exclusive lock. Insertion algorithm * guarantees that inserted row(s) will not continue on next page. * NOTE: intentionally no vacuum_delay_point in this loop. */ if (PageGetMaxOffsetNumber(page) != maxoff) { ginInitBA(&accum); processPendingPage(&accum, &datums, page, maxoff + 1); ginBeginBAScan(&accum); while ((list = ginGetBAEntry(&accum, &attnum, &key, &category, &nlist)) != NULL) ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); } /* * Remember next page - it will become the new list head */ blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); /* shiftList will do exclusive * locking */ /* * remove read pages from pending list, at this point all content * of read pages is in regular structure */ shiftList(index, metabuffer, blkno, fill_fsm, stats); /* At this point, some pending pages have been freed up */ fsm_vac = true; Assert(blkno == metadata->head); LockBuffer(metabuffer, GIN_UNLOCK); /* * if we removed the whole pending list or we cleanup tail (which * we remembered on start our cleanup process) then just exit */ if (blkno == InvalidBlockNumber || cleanupFinish) break; /* * release memory used so far and reinit state */ MemoryContextReset(opCtx); initKeyArray(&datums, datums.maxvalues); ginInitBA(&accum); } else { blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); } /* * Read next page in pending list */ vacuum_delay_point(); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); } UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); ReleaseBuffer(metabuffer); /* * As pending list pages can have a high churn rate, it is desirable to * recycle them immediately to the FreeSpace Map when ordinary backends * clean the list. */ if (fsm_vac && fill_fsm) IndexFreeSpaceMapVacuum(index); /* Clean up temporary space */ MemoryContextSwitchTo(oldCtx); MemoryContextDelete(opCtx); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. The autovacuum launcher process * doesn't use any parameters either, because it only goes far enough to be * able to read pg_database; it doesn't connect to any particular database. * In walsender mode only username is used. * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not completely filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; elog(DEBUG3, "InitPostgres"); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend ID: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * Also set up timeout handlers needed for backend operation. We need * these in every case except bootstrap. */ if (!bootstrap) { RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLock); RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler); RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler); } /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. */ if (IsUnderPostmaster) { /* * The postmaster already started the XLOG machinery, but we need to * call InitXLOGAccess(), if the system isn't in hot-standby mode. * This is handled by calling RecoveryInProgress and ignoring the * result. */ (void) RecoveryInProgress(); } else { /* * We are either a bootstrap process or a standalone backend. Either * way, start up the XLOG machinery, and register to have it closed * down at exit. */ StartupXLOG(); on_shmem_exit(ShutdownXLOG, 0); } /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This is the * first before_shmem_exit callback we register; thus, this will be the * last thing we do before low-level modules like the buffer manager begin * to close down. We need to have this in place before we begin our first * transaction --- if we fail during the initialization transaction, as is * entirely possible, we need the AbortTransaction call to clean up. */ before_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ if (IsAutoVacuumLauncherProcess()) return; /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. (This * is critical for anything that reads heap pages, because HOT may decide * to prune them even if the process doesn't attempt to modify any * tuples.) */ if (!bootstrap) { /* statement_timestamp must be set for timeouts to work correctly */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); /* * transaction_isolation will have been set to the default by the * above. If the default is "serializable", and we are in hot * standby, we will fail if we don't change it to something lower. * Fortunately, "read committed" is plenty good enough. */ XactIsoLevel = XACT_READ_COMMITTED; (void) GetTransactionSnapshot(); } /* * Perform client authentication if necessary, then figure out our * postgres user ID, and see if we are a superuser. * * In standalone mode and in autovacuum worker processes, we use a fixed * ID, otherwise we figure it out from the authenticated user name. */ if (bootstrap || IsAutoVacuumWorkerProcess()) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.", username))); } else if (IsBackgroundWorker) { if (username == NULL) { InitializeSessionUserIdStandalone(); am_superuser = true; } else { InitializeSessionUserId(username); am_superuser = superuser(); } } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * If we're trying to shut down, only superusers can connect, and new * replication connections are not allowed. */ if ((!am_superuser || am_walsender) && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) { if (am_walsender) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("new replication connections are not allowed during database shutdown"))); else ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); } /* * Binary upgrades only allowed super-user connections */ if (IsBinaryUpgrade && !am_superuser) { ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect in binary upgrade mode"))); } /* * The last few connections slots are reserved for superusers. Although * replication connections currently require superuser privileges, we * don't allow them to consume the reserved slots, which are intended for * interactive use. */ if ((!am_superuser || am_walsender) && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("remaining connection slots are reserved for non-replication superuser connections"))); /* Check replication permissions needed for walsender processes. */ if (am_walsender) { Assert(!bootstrap); if (!superuser() && !has_rolreplication(GetUserId())) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to start walsender"))); } /* * If this is a plain walsender only supporting physical replication, we * don't want to connect to any particular database. Just finish the * backend startup by processing any options from the startup packet, and * we're done. */ if (am_walsender && !am_db_walsender) { /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and default tablespace. * But note we won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db's entry in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); } else if (OidIsValid(dboid)) { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); } else { /* * If this is a background worker not bound to any particular * database, we're done now. Everything that follows only makes * sense if we are bound to a specific database. We do need to * close the transaction we started before returning. */ if (!bootstrap) CommitTransactionCommand(); return; } /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we will advertise our use of the * database in the ProcArray before dropping the lock (in fact, that's the * next thing to do). Anyone trying a DROP DATABASE after this point will * see us in the array once they have the lock. Ordering is important for * this because we don't want to advertise ourselves as being in this * database until we have the lock; otherwise we create what amounts to a * deadlock with CountOtherDBBackends(). * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Now we can mark our PGPROC entry with the database ID. * * We assume this is an atomic store so no lock is needed; though actually * things would work fine even if it weren't atomic. Anyone searching the * ProcArray for this database's ID should hold the database lock, so they * would not be executing concurrently with this store. A process looking * for another database's ID could in theory see a chance match if it read * a partially-updated databaseId value; but as long as all such searches * wait and retry, as in CountOtherDBBackends(), they will certainly see * the correct value on their next try. */ MyProc->databaseId = MyDatabaseId; /* * We established a catalog snapshot while reading pg_authid and/or * pg_database; but until we have set up MyDatabaseId, we won't react to * incoming sinval messages for unshared catalogs, so we won't realize it * if the snapshot has been invalidated. Assume it's no good anymore. */ InvalidateCatalogSnapshot(); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Process pg_db_role_setting options */ process_settings(MyDatabaseId, GetSessionUserId()); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. */ void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; PGRUsage ru0; TimestampTz starttime = 0; long secs; int usecs; double read_rate, write_rate; bool scan_all; TransactionId freezeTableLimit; BlockNumber new_rel_pages; double new_rel_tuples; BlockNumber new_rel_allvisible; TransactionId new_frozen_xid; /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { pg_rusage_init(&ru0); starttime = GetCurrentTimestamp(); } if (vacstmt->options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; vac_strategy = bstrategy; vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit, &freezeTableLimit); scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid, freezeTableLimit); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats->old_rel_pages = onerel->rd_rel->relpages; vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) lazy_truncate_heap(onerel, vacrelstats); /* Vacuum the Free Space Map */ FreeSpaceMapVacuum(onerel); /* * Update statistics in pg_class. * * A corner case here is that if we scanned no pages at all because every * page is all-visible, we should not update relpages/reltuples, because * we have no new information to contribute. In particular this keeps * us from replacing relpages=reltuples=0 (which means "unknown tuple * density") with nonzero relpages and reltuples=0 (which means "zero * tuple density") unless there's some actual evidence for the latter. * * We do update relallvisible even in the corner case, since if the * table is all-visible we'd definitely like to know that. But clamp * the value to be not more than what we're setting relpages to. * * Also, don't change relfrozenxid if we skipped any pages, since then * we don't know for certain that all tuples have a newer xmin. */ new_rel_pages = vacrelstats->rel_pages; new_rel_tuples = vacrelstats->new_rel_tuples; if (vacrelstats->scanned_pages == 0 && new_rel_pages > 0) { new_rel_pages = vacrelstats->old_rel_pages; new_rel_tuples = vacrelstats->old_rel_tuples; } new_rel_allvisible = visibilitymap_count(onerel); if (new_rel_allvisible > new_rel_pages) new_rel_allvisible = new_rel_pages; new_frozen_xid = FreezeLimit; if (vacrelstats->scanned_pages < vacrelstats->rel_pages) new_frozen_xid = InvalidTransactionId; vac_update_relstats(onerel, new_rel_pages, new_rel_tuples, new_rel_allvisible, vacrelstats->hasindex, new_frozen_xid); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, new_rel_tuples); /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { TimestampTz endtime = GetCurrentTimestamp(); if (Log_autovacuum_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, Log_autovacuum_min_duration)) { TimestampDifference(starttime, endtime, &secs, &usecs); read_rate = 0; write_rate = 0; if ((secs > 0) || (usecs > 0)) { read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) / (secs + usecs / 1000000.0); write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) / (secs + usecs / 1000000.0); } ereport(LOG, (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n" "pages: %d removed, %d remain\n" "tuples: %.0f removed, %.0f remain\n" "buffer usage: %d hits, %d misses, %d dirtied\n" "avg read rate: %.3f MiB/s, avg write rate: %.3f MiB/s\n" "system usage: %s", get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans, vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->tuples_deleted, vacrelstats->new_rel_tuples, VacuumPageHit, VacuumPageMiss, VacuumPageDirty, read_rate,write_rate, pg_rusage_show(&ru0)))); } } }
/* * Primary entry point for VACUUM and ANALYZE commands. * * relid is normally InvalidOid; if it is not, then it provides the relation * OID to be processed, and vacstmt->relation is ignored. (The non-invalid * case is currently only used by autovacuum.) * * do_toast is passed as FALSE by autovacuum, because it processes TOAST * tables separately. * * for_wraparound is used by autovacuum to let us know when it's forcing * a vacuum for wraparound, which should not be auto-cancelled. * * bstrategy is normally given as NULL, but in autovacuum it can be passed * in to use the same buffer strategy object across multiple vacuum() calls. * * isTopLevel should be passed down from ProcessUtility. * * It is the caller's responsibility that vacstmt and bstrategy * (if given) be allocated in a memory context that won't disappear * at transaction commit. */ void vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel) { const char *stmttype; volatile bool all_rels, in_outer_xact, use_own_xacts; List *relations; /* sanity checks on options */ Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE)); Assert((vacstmt->options & VACOPT_VACUUM) || !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE))); Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL); stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside * a transaction, then our commit- and start-transaction-command calls * would not have the intended effect! There are numerous other subtle * dependencies on this, too. * * ANALYZE (without VACUUM) can run either way. */ if (vacstmt->options & VACOPT_VACUUM) { PreventTransactionChain(isTopLevel, stmttype); in_outer_xact = false; } else in_outer_xact = IsInTransactionChain(isTopLevel); /* * Send info about dead objects to the statistics collector, unless we are * in autovacuum --- autovacuum.c does this for itself. */ if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) pgstat_vacuum_stat(); /* * Create special memory context for cross-transaction storage. * * Since it is a child of PortalContext, it will go away eventually even * if we suffer an error; there's no need for special abort cleanup logic. */ vac_context = AllocSetContextCreate(PortalContext, "Vacuum", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * If caller didn't give us a buffer strategy object, make one in the * cross-transaction memory context. */ if (bstrategy == NULL) { MemoryContext old_context = MemoryContextSwitchTo(vac_context); bstrategy = GetAccessStrategy(BAS_VACUUM); MemoryContextSwitchTo(old_context); } vac_strategy = bstrategy; /* Remember whether we are processing everything in the DB */ all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL); /* * Build list of relations to process, unless caller gave us one. (If we * build one, we put it in vac_context for safekeeping.) */ relations = get_rel_oids(relid, vacstmt->relation); /* * Decide whether we need to start/commit our own transactions. * * For VACUUM (with or without ANALYZE): always do so, so that we can * release locks as soon as possible. (We could possibly use the outer * transaction for a one-table VACUUM, but handling TOAST tables would be * problematic.) * * For ANALYZE (no VACUUM): if inside a transaction block, we cannot * start/commit our own transactions. Also, there's no need to do so if * only processing one relation. For multiple relations when not within a * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ if (vacstmt->options & VACOPT_VACUUM) use_own_xacts = true; else { Assert(vacstmt->options & VACOPT_ANALYZE); if (IsAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) use_own_xacts = false; else if (list_length(relations) > 1) use_own_xacts = true; else use_own_xacts = false; } /* * vacuum_rel expects to be entered with no transaction active; it will * start and commit its own transaction. But we are called by an SQL * command, and so we are executing inside a transaction already. We * commit the transaction started in PostgresMain() here, and start * another one before exiting to match the commit waiting for us back in * PostgresMain(). */ if (use_own_xacts) { /* ActiveSnapshot is not set by autovacuum */ if (ActiveSnapshotSet()) PopActiveSnapshot(); /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } /* Turn vacuum cost accounting on or off */ PG_TRY(); { ListCell *cur; VacuumCostActive = (VacuumCostDelay > 0); VacuumCostBalance = 0; /* * Loop to process each selected relation. */ foreach(cur, relations) { Oid relid = lfirst_oid(cur); bool scanned_all = false; if (vacstmt->options & VACOPT_VACUUM) vacuum_rel(relid, vacstmt, do_toast, for_wraparound, &scanned_all); if (vacstmt->options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, * we can use the outer transaction. */ if (use_own_xacts) { StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); } analyze_rel(relid, vacstmt, vac_strategy, !scanned_all); if (use_own_xacts) { PopActiveSnapshot(); CommitTransactionCommand(); } } } }
static void DtmXactCallback(XactEvent event, void *arg) { //XTM_INFO("%d: DtmXactCallbackevent=%d nextxid=%d\n", getpid(), event, DtmNextXid); switch (event) { case XACT_EVENT_START: //XTM_INFO("%d: normal=%d, initialized=%d, replication=%d, bgw=%d, vacuum=%d\n", // getpid(), IsNormalProcessingMode(), dtm->initialized, MMDoReplication, IsBackgroundWorker, IsAutoVacuumWorkerProcess()); if (IsNormalProcessingMode() && dtm->initialized && MMDoReplication && !am_walsender && !IsBackgroundWorker && !IsAutoVacuumWorkerProcess()) { MMBeginTransaction(); } break; #if 0 case XACT_EVENT_PRE_COMMIT: case XACT_EVENT_PARALLEL_PRE_COMMIT: { TransactionId xid = GetCurrentTransactionIdIfAny(); if (!MMIsDistributedTrans && TransactionIdIsValid(xid)) { XTM_INFO("%d: Will ignore transaction %u\n", getpid(), xid); MMMarkTransAsLocal(xid); } break; } #endif case XACT_EVENT_COMMIT: case XACT_EVENT_ABORT: if (TransactionIdIsValid(DtmNextXid)) { if (!DtmVoted) { ArbiterSetTransStatus(DtmNextXid, TRANSACTION_STATUS_ABORTED, false); } if (event == XACT_EVENT_COMMIT) { /* * Now transaction status is already written in CLOG, * so we can remove information about it from hash table */ LWLockAcquire(dtm->hashLock, LW_EXCLUSIVE); hash_search(xid_in_doubt, &DtmNextXid, HASH_REMOVE, NULL); LWLockRelease(dtm->hashLock); } #if 0 /* should be handled now using DtmVoted flag */ else { /* * Transaction at the node can be aborted because of transaction failure at some other node * before it starts doing anything and assigned Xid, in this case Postgres is not calling SetTransactionStatus, * so we have to send report to DTMD here */ if (!TransactionIdIsValid(GetCurrentTransactionIdIfAny())) { XTM_INFO("%d: abort transation on DTMD\n", getpid()); ArbiterSetTransStatus(DtmNextXid, TRANSACTION_STATUS_ABORTED, false); } } #endif DtmNextXid = InvalidTransactionId; DtmLastSnapshot = NULL; } MMIsDistributedTrans = false; break; default: break; } }
/* * Primary entry point for VACUUM and ANALYZE commands. * * options is a bitmask of VacuumOption flags, indicating what to do. * * relid, if not InvalidOid, indicate the relation to process; otherwise, * the RangeVar is used. (The latter must always be passed, because it's * used for error messages.) * * params contains a set of parameters that can be used to customize the * behavior. * * va_cols is a list of columns to analyze, or NIL to process them all. * * bstrategy is normally given as NULL, but in autovacuum it can be passed * in to use the same buffer strategy object across multiple vacuum() calls. * * isTopLevel should be passed down from ProcessUtility. * * It is the caller's responsibility that all parameters are allocated in a * memory context that will not disappear at transaction commit. */ void vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params, List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel) { const char *stmttype; volatile bool in_outer_xact, use_own_xacts; List *relations; static bool in_vacuum = false; Assert(params != NULL); stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside * a transaction, then our commit- and start-transaction-command calls * would not have the intended effect! There are numerous other subtle * dependencies on this, too. * * ANALYZE (without VACUUM) can run either way. */ if (options & VACOPT_VACUUM) { PreventTransactionChain(isTopLevel, stmttype); in_outer_xact = false; } else in_outer_xact = IsInTransactionChain(isTopLevel); /* * Due to static variables vac_context, anl_context and vac_strategy, * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE * calls a hostile index expression that itself calls ANALYZE. */ if (in_vacuum) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("%s cannot be executed from VACUUM or ANALYZE", stmttype))); /* * Sanity check DISABLE_PAGE_SKIPPING option. */ if ((options & VACOPT_FULL) != 0 && (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL"))); /* * Send info about dead objects to the statistics collector, unless we are * in autovacuum --- autovacuum.c does this for itself. */ if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) pgstat_vacuum_stat(); /* * Create special memory context for cross-transaction storage. * * Since it is a child of PortalContext, it will go away eventually even * if we suffer an error; there's no need for special abort cleanup logic. */ vac_context = AllocSetContextCreate(PortalContext, "Vacuum", ALLOCSET_DEFAULT_SIZES); /* * If caller didn't give us a buffer strategy object, make one in the * cross-transaction memory context. */ if (bstrategy == NULL) { MemoryContext old_context = MemoryContextSwitchTo(vac_context); bstrategy = GetAccessStrategy(BAS_VACUUM); MemoryContextSwitchTo(old_context); } vac_strategy = bstrategy; /* * Build list of relations to process, unless caller gave us one. (If we * build one, we put it in vac_context for safekeeping.) */ relations = get_rel_oids(relid, relation); /* * Decide whether we need to start/commit our own transactions. * * For VACUUM (with or without ANALYZE): always do so, so that we can * release locks as soon as possible. (We could possibly use the outer * transaction for a one-table VACUUM, but handling TOAST tables would be * problematic.) * * For ANALYZE (no VACUUM): if inside a transaction block, we cannot * start/commit our own transactions. Also, there's no need to do so if * only processing one relation. For multiple relations when not within a * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ if (options & VACOPT_VACUUM) use_own_xacts = true; else { Assert(options & VACOPT_ANALYZE); if (IsAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) use_own_xacts = false; else if (list_length(relations) > 1) use_own_xacts = true; else use_own_xacts = false; } /* * vacuum_rel expects to be entered with no transaction active; it will * start and commit its own transaction. But we are called by an SQL * command, and so we are executing inside a transaction already. We * commit the transaction started in PostgresMain() here, and start * another one before exiting to match the commit waiting for us back in * PostgresMain(). */ if (use_own_xacts) { Assert(!in_outer_xact); /* ActiveSnapshot is not set by autovacuum */ if (ActiveSnapshotSet()) PopActiveSnapshot(); /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } /* Turn vacuum cost accounting on or off */ PG_TRY(); { ListCell *cur; in_vacuum = true; VacuumCostActive = (VacuumCostDelay > 0); VacuumCostBalance = 0; VacuumPageHit = 0; VacuumPageMiss = 0; VacuumPageDirty = 0; /* * Loop to process each selected relation. */ foreach(cur, relations) { Oid relid = lfirst_oid(cur); if (options & VACOPT_VACUUM) { if (!vacuum_rel(relid, relation, options, params)) continue; } if (options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, * we can use the outer transaction. */ if (use_own_xacts) { StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); } analyze_rel(relid, relation, options, params, va_cols, in_outer_xact, vac_strategy); if (use_own_xacts) { PopActiveSnapshot(); CommitTransactionCommand(); } } } }
/* * InitProcess -- initialize a per-process data structure for this backend */ void InitProcess(void) { /* use volatile pointer to prevent code rearrangement */ volatile PROC_HDR *procglobal = ProcGlobal; /* * ProcGlobal should be set up already (if we are a backend, we inherit * this by fork() or EXEC_BACKEND mechanism from the postmaster). */ if (procglobal == NULL) elog(PANIC, "proc header uninitialized"); if (MyProc != NULL) elog(ERROR, "you already exist"); /* * Try to get a proc struct from the free list. If this fails, we must be * out of PGPROC structures (not to mention semaphores). * * While we are holding the ProcStructLock, also copy the current shared * estimate of spins_per_delay to local storage. */ SpinLockAcquire(ProcStructLock); set_spins_per_delay(procglobal->spins_per_delay); if (IsAnyAutoVacuumProcess()) MyProc = procglobal->autovacFreeProcs; else if (IsBackgroundWorker) MyProc = procglobal->bgworkerFreeProcs; else MyProc = procglobal->freeProcs; if (MyProc != NULL) { if (IsAnyAutoVacuumProcess()) procglobal->autovacFreeProcs = (PGPROC *) MyProc->links.next; else if (IsBackgroundWorker) procglobal->bgworkerFreeProcs = (PGPROC *) MyProc->links.next; else procglobal->freeProcs = (PGPROC *) MyProc->links.next; SpinLockRelease(ProcStructLock); } else { /* * If we reach here, all the PGPROCs are in use. This is one of the * possible places to detect "too many backends", so give the standard * error message. XXX do we need to give a different failure message * in the autovacuum case? */ SpinLockRelease(ProcStructLock); ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); } MyPgXact = &ProcGlobal->allPgXact[MyProc->pgprocno]; /* * Now that we have a PGPROC, mark ourselves as an active postmaster * child; this is so that the postmaster can detect it if we exit without * cleaning up. (XXX autovac launcher currently doesn't participate in * this; it probably should.) */ if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess()) MarkPostmasterChildActive(); /* * Initialize all fields of MyProc, except for those previously * initialized by InitProcGlobal. */ SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->lxid = InvalidLocalTransactionId; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; MyPgXact->xid = InvalidTransactionId; MyPgXact->xmin = InvalidTransactionId; MyProc->pid = MyProcPid; /* backendId, databaseId and roleId will be filled in later */ MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; MyPgXact->delayChkpt = false; MyPgXact->vacuumFlags = 0; /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ if (IsAutoVacuumWorkerProcess()) MyPgXact->vacuumFlags |= PROC_IS_AUTOVACUUM; MyProc->lwWaiting = false; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; #ifdef USE_ASSERT_CHECKING { int i; /* Last process should have released all locks. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) Assert(SHMQueueEmpty(&(MyProc->myProcLocks[i]))); } #endif MyProc->recoveryConflictPending = false; /* Initialize fields for sync rep */ MyProc->waitLSN = 0; MyProc->syncRepState = SYNC_REP_NOT_WAITING; SHMQueueElemInit(&(MyProc->syncRepLinks)); /* * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch * on it. That allows us to repoint the process latch, which so far * points to process local one, to the shared one. */ OwnLatch(&MyProc->procLatch); SwitchToSharedLatch(); /* * We might be reusing a semaphore that belonged to a failed process. So * be careful and reinitialize its value here. (This is not strictly * necessary anymore, but seems like a good idea for cleanliness.) */ PGSemaphoreReset(&MyProc->sem); /* * Arrange to clean up at backend exit. */ on_shmem_exit(ProcKill, 0); /* * Now that we have a PGPROC, we could try to acquire locks, so initialize * local state needed for LWLocks, and the deadlock checker. */ InitLWLockAccess(); InitDeadLockChecking(); }
Datum ginvacuumcleanup(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation index = info->index; bool needLock; BlockNumber npages, blkno; BlockNumber totFreePages; GinState ginstate; GinStatsData idxStat; /* * In an autovacuum analyze, we want to clean up pending insertions. * Otherwise, an ANALYZE-only call is a no-op. */ if (info->analyze_only) { if (IsAutoVacuumWorkerProcess()) { initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } PG_RETURN_POINTER(stats); } /* * Set up all-zero stats and cleanup pending inserts if ginbulkdelete * wasn't called */ if (stats == NULL) { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } memset(&idxStat, 0, sizeof(idxStat)); /* * XXX we always report the heap tuple count as the number of index * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ stats->num_index_tuples = info->num_heap_tuples; stats->estimated_count = info->estimated_count; /* * Need lock unless it's local to this backend. */ needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); npages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); totFreePages = 0; for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); if (GinPageIsDeleted(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); totFreePages++; } else if (GinPageIsData(page)) { idxStat.nDataPages++; } else if (!GinPageIsList(page)) { idxStat.nEntryPages++; if (GinPageIsLeaf(page)) idxStat.nEntries += PageGetMaxOffsetNumber(page); } UnlockReleaseBuffer(buffer); } /* Update the metapage with accurate page and entry counts */ idxStat.nTotalPages = npages; ginUpdateStats(info->index, &idxStat); /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); stats->pages_free = totFreePages; if (needLock) LockRelationForExtension(index, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); PG_RETURN_POINTER(stats); }
IndexBulkDeleteResult * ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { Relation index = info->index; BlockNumber blkno = GIN_ROOT_BLKNO; GinVacuumState gvs; Buffer buffer; BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext, "Gin vacuum temporary context", ALLOCSET_DEFAULT_SIZES); gvs.index = index; gvs.callback = callback; gvs.callback_state = callback_state; gvs.strategy = info->strategy; initGinState(&gvs.ginstate, index); /* first time through? */ if (stats == NULL) { /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* * and cleanup any pending inserts */ ginInsertCleanup(&gvs.ginstate, !IsAutoVacuumWorkerProcess(), false, stats); } /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; gvs.result = stats; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); /* find leaf page */ for (;;) { Page page = BufferGetPage(buffer); IndexTuple itup; LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsData(page)); if (GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_EXCLUSIVE); if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); continue; /* check it one more */ } break; } Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); blkno = GinGetDownlink(itup); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); } /* right now we found leftmost page in entry's BTree */ for (;;) { Page page = BufferGetPage(buffer); Page resPage; uint32 i; Assert(!GinPageIsData(page)); resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); blkno = GinPageGetOpaque(page)->rightlink; if (resPage) { START_CRIT_SECTION(); PageRestoreTempPage(resPage, page); MarkBufferDirty(buffer); xlogVacuumPage(gvs.index, buffer); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); } else { UnlockReleaseBuffer(buffer); } vacuum_delay_point(); for (i = 0; i < nRoot; i++) { ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); vacuum_delay_point(); } if (blkno == InvalidBlockNumber) /* rightmost page */ break; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_EXCLUSIVE); } MemoryContextDelete(gvs.tmpCxt); return gvs.result; }
/* * CheckMyDatabase -- fetch information from the pg_database entry for our DB */ static void CheckMyDatabase(const char *name, bool am_superuser) { HeapTuple tup; Form_pg_database dbform; char *collate; char *ctype; /* Fetch our pg_database row normally, via syscache */ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); dbform = (Form_pg_database) GETSTRUCT(tup); /* This recheck is strictly paranoia */ if (strcmp(name, NameStr(dbform->datname)) != 0) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" has disappeared from pg_database", name), errdetail("Database OID %u now seems to belong to \"%s\".", MyDatabaseId, NameStr(dbform->datname)))); /* * Check permissions to connect to the database. * * These checks are not enforced when in standalone mode, so that there is * a way to recover from disabling all access to all databases, for * example "UPDATE pg_database SET datallowconn = false;". * * We do not enforce them for autovacuum worker processes either. */ if (IsUnderPostmaster && !IsAutoVacuumWorkerProcess()) { /* * Check that the database is currently allowing connections. */ if (!dbform->datallowconn) ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("database \"%s\" is not currently accepting connections", name))); /* * Check privilege to connect to the database. (The am_superuser test * is redundant, but since we have the flag, might as well check it * and save a few cycles.) */ if (!am_superuser && pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CONNECT) != ACLCHECK_OK) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for database \"%s\"", name), errdetail("User does not have CONNECT privilege."))); /* * Check connection limit for this database. * * There is a race condition here --- we create our PGPROC before * checking for other PGPROCs. If two backends did this at about the * same time, they might both think they were over the limit, while * ideally one should succeed and one fail. Getting that to work * exactly seems more trouble than it is worth, however; instead we * just document that the connection limit is approximate. */ if (dbform->datconnlimit >= 0 && !am_superuser && CountDBBackends(MyDatabaseId) > dbform->datconnlimit) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("too many connections for database \"%s\"", name))); } /* * OK, we're golden. Next to-do item is to save the encoding info out of * the pg_database tuple. */ SetDatabaseEncoding(dbform->encoding); /* Record it as a GUC internal option, too */ SetConfigOption("server_encoding", GetDatabaseEncodingName(), PGC_INTERNAL, PGC_S_OVERRIDE); /* If we have no other source of client_encoding, use server encoding */ SetConfigOption("client_encoding", GetDatabaseEncodingName(), PGC_BACKEND, PGC_S_DYNAMIC_DEFAULT); /* assign locale variables */ collate = NameStr(dbform->datcollate); ctype = NameStr(dbform->datctype); if (pg_perm_setlocale(LC_COLLATE, collate) == NULL) ereport(FATAL, (errmsg("database locale is incompatible with operating system"), errdetail("The database was initialized with LC_COLLATE \"%s\", " " which is not recognized by setlocale().", collate), errhint("Recreate the database with another locale or install the missing locale."))); if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL) ereport(FATAL, (errmsg("database locale is incompatible with operating system"), errdetail("The database was initialized with LC_CTYPE \"%s\", " " which is not recognized by setlocale().", ctype), errhint("Recreate the database with another locale or install the missing locale."))); /* Make the locale settings visible as GUC variables, too */ SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE); SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE); ReleaseSysCache(tup); }
/* * CheckMyDatabase -- fetch information from the pg_database entry for our DB */ static void CheckMyDatabase(const char *name, bool am_superuser) { HeapTuple tup; Form_pg_database dbform; /* Fetch our pg_database row normally, via syscache */ tup = SearchSysCache(DATABASEOID, ObjectIdGetDatum(MyDatabaseId), 0, 0, 0); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); dbform = (Form_pg_database) GETSTRUCT(tup); /* This recheck is strictly paranoia */ if (strcmp(name, NameStr(dbform->datname)) != 0) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" has disappeared from pg_database", name), errdetail("Database OID %u now seems to belong to \"%s\".", MyDatabaseId, NameStr(dbform->datname)))); /* * Check permissions to connect to the database. * * These checks are not enforced when in standalone mode, so that there is * a way to recover from disabling all access to all databases, for * example "UPDATE pg_database SET datallowconn = false;". * * We do not enforce them for the autovacuum worker processes either. */ if (IsUnderPostmaster && !IsAutoVacuumWorkerProcess()) { /* * Check that the database is currently allowing connections. */ if (!dbform->datallowconn) ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("database \"%s\" is not currently accepting connections", name))); /* * Check privilege to connect to the database. (The am_superuser test * is redundant, but since we have the flag, might as well check it * and save a few cycles.) */ if (!am_superuser && pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CONNECT) != ACLCHECK_OK) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for database \"%s\"", name), errdetail("User does not have CONNECT privilege."))); /* * Check connection limit for this database. * * There is a race condition here --- we create our PGPROC before * checking for other PGPROCs. If two backends did this at about the * same time, they might both think they were over the limit, while * ideally one should succeed and one fail. Getting that to work * exactly seems more trouble than it is worth, however; instead we * just document that the connection limit is approximate. */ if (dbform->datconnlimit >= 0 && !am_superuser && CountDBBackends(MyDatabaseId) > dbform->datconnlimit) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("too many connections for database \"%s\"", name))); } /* * OK, we're golden. Next to-do item is to save the encoding info out of * the pg_database tuple. */ SetDatabaseEncoding(dbform->encoding); /* Record it as a GUC internal option, too */ SetConfigOption("server_encoding", GetDatabaseEncodingName(), PGC_INTERNAL, PGC_S_OVERRIDE); /* If we have no other source of client_encoding, use server encoding */ SetConfigOption("client_encoding", GetDatabaseEncodingName(), PGC_BACKEND, PGC_S_DEFAULT); /* Use the right encoding in translated messages */ #ifdef ENABLE_NLS pg_bind_textdomain_codeset(textdomain(NULL)); #endif /* * Lastly, set up any database-specific configuration variables. */ if (IsUnderPostmaster) { Datum datum; bool isnull; datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_datconfig, &isnull); if (!isnull) { ArrayType *a = DatumGetArrayTypeP(datum); /* * We process all the options at SUSET level. We assume that the * right to insert an option into pg_database was checked when it * was inserted. */ ProcessGUCArray(a, PGC_SUSET, PGC_S_DATABASE, GUC_ACTION_SET); } } ReleaseSysCache(tup); }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. */ void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; PGRUsage ru0; TimestampTz starttime = 0; bool scan_all; TransactionId freezeTableLimit; pg_rusage_init(&ru0); /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0) starttime = GetCurrentTimestamp(); if (vacstmt->options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; vac_strategy = bstrategy; vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit, &freezeTableLimit); scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid, freezeTableLimit); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) lazy_truncate_heap(onerel, vacrelstats); /* Vacuum the Free Space Map */ FreeSpaceMapVacuum(onerel); /* * Update statistics in pg_class. But don't change relfrozenxid if we * skipped any pages. */ vac_update_relstats(onerel, vacrelstats->rel_pages, vacrelstats->new_rel_tuples, vacrelstats->hasindex, (vacrelstats->scanned_pages < vacrelstats->rel_pages) ? InvalidTransactionId : FreezeLimit); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, vacrelstats->new_rel_tuples); /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { if (Log_autovacuum_min_duration == 0 || TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), Log_autovacuum_min_duration)) ereport(LOG, (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n" "pages: %d removed, %d remain\n" "tuples: %.0f removed, %.0f remain\n" "system usage: %s", get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans, vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->tuples_deleted, vacrelstats->new_rel_tuples, pg_rusage_show(&ru0)))); } }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. * * The return value indicates whether the userID is a superuser. (That * can only be tested inside a transaction, so we want to do it during * the startup transaction rather than doing a separate one in postgres.c.) * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool autovacuum = IsAutoVacuumWorkerProcess(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* Initialize SessionState entry */ SessionState_Init(); /* Initialize memory protection */ GPMemoryProtect_Init(); #ifdef USE_ORCA /* Initialize GPOPT */ InitGPOPT(); #endif /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. In bootstrap case we may * skip this since StartupXLOG() was run instead. */ if (!bootstrap) InitXLOGAccess(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* TODO: autovacuum launcher should be done here? */ /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Figure out our postgres user id, and see if we are a superuser. * * In standalone mode and in the autovacuum process, we use a fixed id, * otherwise we figure it out from the authenticated user name. */ if (bootstrap || autovacuum) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"), errSendAlert(true))); /* * If walsender, we don't want to connect to any particular database. Just * finish the backend startup by processing any options from the startup * packet, and we're done. */ if (am_walsender) { Assert(!bootstrap); /* * We don't have replication role, which existed in postgres. */ if (!superuser()) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser role to start walsender"))); /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and path. But note we * won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); pfree(tuple); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); pfree(tuple); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* * Now we have full access to catalog including toast tables, * we can process pg_authid.rolconfig. This ought to come before * processing startup options so that it can override the settings. */ if (!bootstrap) ProcessRoleGUC(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* * Maintenance Mode: allow superuser to connect when * gp_maintenance_conn GUC is set. We cannot check it until * process_startup_options parses the GUC. */ if (gp_maintenance_mode && Gp_role == GP_ROLE_DISPATCH && !(superuser() && gp_maintenance_conn)) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("maintenance mode: connected by superuser only"), errSendAlert(false))); /* * MPP: If we were started in utility mode then we only want to allow * incoming sessions that specify gp_session_role=utility as well. This * lets the bash scripts start the QD in utility mode and connect in but * protect ourselves from normal clients who might be trying to connect to * the system while we startup. */ if ((Gp_role == GP_ROLE_UTILITY) && (Gp_session_role != GP_ROLE_UTILITY)) { ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("System was started in master-only utility mode - only utility mode connections are allowed"))); } /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* * MPP package setup * * Primary function is to establish connctions to the qExecs. * This is SKIPPED when the database is in bootstrap mode or * Is not UnderPostmaster. */ if (!bootstrap && IsUnderPostmaster) { cdb_setup(); on_proc_exit( cdb_cleanup, 0 ); } /* * MPP SharedSnapshot Setup */ if (Gp_role == GP_ROLE_DISPATCH) { addSharedSnapshot("Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_DISPATCHAGENT) { SharedLocalSnapshotSlot = NULL; } else if (Gp_segment == -1 && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer) { /* * Entry db singleton QE is a user of the shared snapshot -- not a creator. * The lookup will occur once the distributed snapshot has been received. */ lookupSharedSnapshot("Entry DB Singleton", "Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_EXECUTE) { if (Gp_is_writer) { addSharedSnapshot("Writer qExec", gp_session_id); } else { /* * NOTE: This assumes that the Slot has already been * allocated by the writer. Need to make sure we * always allocate the writer qExec first. */ lookupSharedSnapshot("Reader qExec", "Writer qExec", gp_session_id); } } /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); return; }