/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. * * The return value indicates whether the userID is a superuser. (That * can only be tested inside a transaction, so we want to do it during * the startup transaction rather than doing a separate one in postgres.c.) * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool autovacuum = IsAutoVacuumWorkerProcess(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* Initialize SessionState entry */ SessionState_Init(); /* Initialize memory protection */ GPMemoryProtect_Init(); #ifdef USE_ORCA /* Initialize GPOPT */ InitGPOPT(); #endif /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. In bootstrap case we may * skip this since StartupXLOG() was run instead. */ if (!bootstrap) InitXLOGAccess(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* TODO: autovacuum launcher should be done here? */ /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Figure out our postgres user id, and see if we are a superuser. * * In standalone mode and in the autovacuum process, we use a fixed id, * otherwise we figure it out from the authenticated user name. */ if (bootstrap || autovacuum) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"), errSendAlert(true))); /* * If walsender, we don't want to connect to any particular database. Just * finish the backend startup by processing any options from the startup * packet, and we're done. */ if (am_walsender) { Assert(!bootstrap); /* * We don't have replication role, which existed in postgres. */ if (!superuser()) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser role to start walsender"))); /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and path. But note we * won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); pfree(tuple); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); pfree(tuple); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* * Now we have full access to catalog including toast tables, * we can process pg_authid.rolconfig. This ought to come before * processing startup options so that it can override the settings. */ if (!bootstrap) ProcessRoleGUC(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* * Maintenance Mode: allow superuser to connect when * gp_maintenance_conn GUC is set. We cannot check it until * process_startup_options parses the GUC. */ if (gp_maintenance_mode && Gp_role == GP_ROLE_DISPATCH && !(superuser() && gp_maintenance_conn)) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("maintenance mode: connected by superuser only"), errSendAlert(false))); /* * MPP: If we were started in utility mode then we only want to allow * incoming sessions that specify gp_session_role=utility as well. This * lets the bash scripts start the QD in utility mode and connect in but * protect ourselves from normal clients who might be trying to connect to * the system while we startup. */ if ((Gp_role == GP_ROLE_UTILITY) && (Gp_session_role != GP_ROLE_UTILITY)) { ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("System was started in master-only utility mode - only utility mode connections are allowed"))); } /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* * MPP package setup * * Primary function is to establish connctions to the qExecs. * This is SKIPPED when the database is in bootstrap mode or * Is not UnderPostmaster. */ if (!bootstrap && IsUnderPostmaster) { cdb_setup(); on_proc_exit( cdb_cleanup, 0 ); } /* * MPP SharedSnapshot Setup */ if (Gp_role == GP_ROLE_DISPATCH) { addSharedSnapshot("Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_DISPATCHAGENT) { SharedLocalSnapshotSlot = NULL; } else if (Gp_segment == -1 && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer) { /* * Entry db singleton QE is a user of the shared snapshot -- not a creator. * The lookup will occur once the distributed snapshot has been received. */ lookupSharedSnapshot("Entry DB Singleton", "Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_EXECUTE) { if (Gp_is_writer) { addSharedSnapshot("Writer qExec", gp_session_id); } else { /* * NOTE: This assumes that the Slot has already been * allocated by the writer. Need to make sure we * always allocate the writer qExec first. */ lookupSharedSnapshot("Reader qExec", "Writer qExec", gp_session_id); } } /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); return; }
static void QDMirroringUpdate( QDMIRRORUpdateMask updateMask, bool validFlag, QDMIRRORState state, QDMIRRORDisabledReason disabledReason, struct timeval *lastLogTimeVal, char *errorMessage) { #define UPDATE_VALIDFLAG_CMD "update gp_configuration set valid='%c' where dbid = CAST(%d AS SMALLINT)" #define UPDATE_MASTER_MIRRORING_CMD "update gp_master_mirroring set (summary_state, detail_state, log_time, error_message) = ('%s', %s, '%s'::timestamptz, %s);" int count = 0; char cmd[200 + QDMIRRORErrorMessageSize * 2 + 3]; char detailValue[100]; char logTimeStr[128]; char *summaryStateString; char *detailStateString; char errorMessageQuoted[QDMIRRORErrorMessageSize * 2 + 3]; char *user; MemoryContext mcxt = CurrentMemoryContext; Segment *master = NULL; volatile PQExpBuffer entryBuffer = NULL; volatile PGconn *entryConn = NULL; volatile PGresult *rs = NULL; PG_TRY(); { StartTransactionCommand(); user = getDBSuperuserName("QDMirroringUpdate"); Assert(user != NULL); master = GetMasterSegment(); entryBuffer = createPQExpBuffer(); if (PQExpBufferBroken(entryBuffer)) { destroyPQExpBuffer(entryBuffer); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("QDMirroringUpdate: out of memory"))); /* not reached. */ } /* * initialize libpq connection buffer, we only need to initialize it * once. */ initPQConnectionBuffer(master, user, NULL, entryBuffer, true); FreeSegment(master); free(user); /* * Call libpq to connect */ entryConn = PQconnectdb(entryBuffer->data); if (PQstatus((PGconn *)entryConn) == CONNECTION_BAD) { /* * When we get an error, we strdup it here. When the main thread * checks for errors, it makes a palloc copy of this, and frees * this. */ char *error_message = strdup(PQerrorMessage((PGconn *)entryConn)); if (!error_message) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("QDMirroringUpdate: out of memory"))); } destroyPQExpBuffer(entryBuffer); PQfinish((PGconn *)entryConn); entryConn = NULL; elog(FATAL, "QDMirroringUpdate: setting segDB state failed, error connecting to entry db, error: %s", error_message); } /* finally, we're ready to actually get some stuff done. */ do { rs = PQexec((PGconn *)entryConn, "BEGIN"); if (PQresultStatus((PGresult *)rs) != PGRES_COMMAND_OK) break; if ((updateMask & QDMIRROR_UPDATEMASK_VALIDFLAG) != 0) { count = snprintf(cmd, sizeof(cmd), UPDATE_VALIDFLAG_CMD, (validFlag ? 't' : 'f'), ftsQDMirrorInfo->dbid); if (count >= sizeof(cmd)) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("QDMirroringUpdate: format command string failure"))); } rs = PQexec((PGconn *)entryConn, cmd); if (PQresultStatus((PGresult *)rs) != PGRES_COMMAND_OK) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("QDMirroringUpdate: could not execute command '%s'", cmd))); break; } } if ((updateMask & QDMIRROR_UPDATEMASK_MASTERMIRRORING)!= 0) { switch (state) { case QDMIRROR_STATE_NONE: summaryStateString = "None"; break; case QDMIRROR_STATE_NOTCONFIGURED: summaryStateString = "Not Configured"; break; case QDMIRROR_STATE_CONNECTINGWALSENDSERVER: case QDMIRROR_STATE_POSITIONINGTOEND: case QDMIRROR_STATE_CATCHUPPENDING: case QDMIRROR_STATE_CATCHINGUP: summaryStateString = "Synchronizing"; break; case QDMIRROR_STATE_SYNCHRONIZED: summaryStateString = "Synchronized"; break; case QDMIRROR_STATE_DISABLED: summaryStateString = "Not Synchronized"; break; default: summaryStateString = "Unknown"; break; } if (state == QDMIRROR_STATE_DISABLED) { detailStateString = QDMirroringDisabledReasonToString(disabledReason); } else { detailStateString = NULL; } if (detailStateString == NULL) { strcpy(detailValue, "null"); } else { count = snprintf(detailValue, sizeof(detailValue), "'%s'", detailStateString); if (count >= sizeof(detailValue)) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("QDMirroringUpdate: format command string failure"))); } } QDMirroringFormatTime(logTimeStr, sizeof(logTimeStr), lastLogTimeVal); /* * Escape quote the error string before putting in DML statement... */ if (errorMessage != NULL) { int errorMessageLen = strlen(errorMessage); if (errorMessageLen == 0) { strcpy(errorMessageQuoted, "null"); } else { size_t escapedLen; errorMessageQuoted[0] = '\''; escapedLen = PQescapeString(&errorMessageQuoted[1], errorMessage, errorMessageLen); errorMessageQuoted[escapedLen + 1] = '\''; errorMessageQuoted[escapedLen + 2] = '\0'; elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Error message quoted: \"%s\"", errorMessageQuoted); } } else { strcpy(errorMessageQuoted, "null"); } count = snprintf(cmd, sizeof(cmd), UPDATE_MASTER_MIRRORING_CMD, summaryStateString, detailValue, logTimeStr, errorMessageQuoted); if (count >= sizeof(cmd)) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("QDMirroringUpdate: format command string failure"))); } rs = PQexec((PGconn *)entryConn, cmd); if (PQresultStatus((PGresult *)rs) != PGRES_COMMAND_OK) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("QDMirroringUpdate: could not execute command '%s'", cmd))); break; } elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Successfully executed command \"%s\"", cmd); rs = PQexec((PGconn *)entryConn, "COMMIT"); if (PQresultStatus((PGresult *)rs) != PGRES_COMMAND_OK) break; } } while (0); PQclear((PGresult *)rs); PQfinish((PGconn *)entryConn); destroyPQExpBuffer(entryBuffer); CommitTransactionCommand(); } PG_CATCH(); { PQclear((PGresult *)rs); PQfinish((PGconn *)entryConn); destroyPQExpBuffer(entryBuffer); AbortCurrentTransaction(); } PG_END_TRY(); MemoryContextSwitchTo(mcxt); /* Just incase we hit an error */ return; }
/* * Primary entry point for VACUUM and ANALYZE commands. * * options is a bitmask of VacuumOption flags, indicating what to do. * * relid, if not InvalidOid, indicate the relation to process; otherwise, * the RangeVar is used. (The latter must always be passed, because it's * used for error messages.) * * params contains a set of parameters that can be used to customize the * behavior. * * va_cols is a list of columns to analyze, or NIL to process them all. * * bstrategy is normally given as NULL, but in autovacuum it can be passed * in to use the same buffer strategy object across multiple vacuum() calls. * * isTopLevel should be passed down from ProcessUtility. * * It is the caller's responsibility that all parameters are allocated in a * memory context that will not disappear at transaction commit. */ void vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params, List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel) { const char *stmttype; volatile bool in_outer_xact, use_own_xacts; List *relations; static bool in_vacuum = false; Assert(params != NULL); stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside * a transaction, then our commit- and start-transaction-command calls * would not have the intended effect! There are numerous other subtle * dependencies on this, too. * * ANALYZE (without VACUUM) can run either way. */ if (options & VACOPT_VACUUM) { PreventTransactionChain(isTopLevel, stmttype); in_outer_xact = false; } else in_outer_xact = IsInTransactionChain(isTopLevel); /* * Due to static variables vac_context, anl_context and vac_strategy, * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE * calls a hostile index expression that itself calls ANALYZE. */ if (in_vacuum) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("%s cannot be executed from VACUUM or ANALYZE", stmttype))); /* * Send info about dead objects to the statistics collector, unless we are * in autovacuum --- autovacuum.c does this for itself. */ if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) pgstat_vacuum_stat(); /* * Create special memory context for cross-transaction storage. * * Since it is a child of PortalContext, it will go away eventually even * if we suffer an error; there's no need for special abort cleanup logic. */ vac_context = AllocSetContextCreate(PortalContext, "Vacuum", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * If caller didn't give us a buffer strategy object, make one in the * cross-transaction memory context. */ if (bstrategy == NULL) { MemoryContext old_context = MemoryContextSwitchTo(vac_context); bstrategy = GetAccessStrategy(BAS_VACUUM); MemoryContextSwitchTo(old_context); } vac_strategy = bstrategy; /* * Build list of relations to process, unless caller gave us one. (If we * build one, we put it in vac_context for safekeeping.) */ relations = get_rel_oids(relid, relation); /* * Decide whether we need to start/commit our own transactions. * * For VACUUM (with or without ANALYZE): always do so, so that we can * release locks as soon as possible. (We could possibly use the outer * transaction for a one-table VACUUM, but handling TOAST tables would be * problematic.) * * For ANALYZE (no VACUUM): if inside a transaction block, we cannot * start/commit our own transactions. Also, there's no need to do so if * only processing one relation. For multiple relations when not within a * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ if (options & VACOPT_VACUUM) use_own_xacts = true; else { Assert(options & VACOPT_ANALYZE); if (IsAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) use_own_xacts = false; else if (list_length(relations) > 1) use_own_xacts = true; else use_own_xacts = false; } /* * vacuum_rel expects to be entered with no transaction active; it will * start and commit its own transaction. But we are called by an SQL * command, and so we are executing inside a transaction already. We * commit the transaction started in PostgresMain() here, and start * another one before exiting to match the commit waiting for us back in * PostgresMain(). */ if (use_own_xacts) { Assert(!in_outer_xact); /* ActiveSnapshot is not set by autovacuum */ if (ActiveSnapshotSet()) PopActiveSnapshot(); /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } /* Turn vacuum cost accounting on or off */ PG_TRY(); { ListCell *cur; in_vacuum = true; VacuumCostActive = (VacuumCostDelay > 0); VacuumCostBalance = 0; VacuumPageHit = 0; VacuumPageMiss = 0; VacuumPageDirty = 0; /* * Loop to process each selected relation. */ foreach(cur, relations) { Oid relid = lfirst_oid(cur); if (options & VACOPT_VACUUM) { if (!vacuum_rel(relid, relation, options, params)) continue; } if (options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, * we can use the outer transaction. */ if (use_own_xacts) { StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); } analyze_rel(relid, relation, options, params, va_cols, in_outer_xact, vac_strategy); if (use_own_xacts) { PopActiveSnapshot(); CommitTransactionCommand(); } } } }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * Similarly, the username can be passed by name, using the username parameter, * or by OID using the useroid parameter. * * In bootstrap mode no parameters are used. The autovacuum launcher process * doesn't use any parameters either, because it only goes far enough to be * able to read pg_database; it doesn't connect to any particular database. * In walsender mode only username is used. * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not completely filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, char *out_dbname, bool override_allow_connections) { bool bootstrap = IsBootstrapProcessingMode(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; elog(DEBUG3, "InitPostgres"); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend ID: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * Also set up timeout handlers needed for backend operation. We need * these in every case except bootstrap. */ if (!bootstrap) { RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLockAlert); RegisterTimeout(STATEMENT_TIMEOUT, StatementTimeoutHandler); RegisterTimeout(LOCK_TIMEOUT, LockTimeoutHandler); RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IdleInTransactionSessionTimeoutHandler); } /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. */ if (IsUnderPostmaster) { /* * The postmaster already started the XLOG machinery, but we need to * call InitXLOGAccess(), if the system isn't in hot-standby mode. * This is handled by calling RecoveryInProgress and ignoring the * result. */ (void) RecoveryInProgress(); } else { /* * We are either a bootstrap process or a standalone backend. Either * way, start up the XLOG machinery, and register to have it closed * down at exit. * * We don't yet have an aux-process resource owner, but StartupXLOG * and ShutdownXLOG will need one. Hence, create said resource owner * (and register a callback to clean it up after ShutdownXLOG runs). */ CreateAuxProcessResourceOwner(); StartupXLOG(); /* Release (and warn about) any buffer pins leaked in StartupXLOG */ ReleaseAuxProcessResources(true); /* Reset CurrentResourceOwner to nothing for the moment */ CurrentResourceOwner = NULL; on_shmem_exit(ShutdownXLOG, 0); } /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This is the * first before_shmem_exit callback we register; thus, this will be the * last thing we do before low-level modules like the buffer manager begin * to close down. We need to have this in place before we begin our first * transaction --- if we fail during the initialization transaction, as is * entirely possible, we need the AbortTransaction call to clean up. */ before_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ if (IsAutoVacuumLauncherProcess()) { /* report this backend in the PgBackendStatus array */ pgstat_bestart(); return; } /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. (This * is critical for anything that reads heap pages, because HOT may decide * to prune them even if the process doesn't attempt to modify any * tuples.) */ if (!bootstrap) { /* statement_timestamp must be set for timeouts to work correctly */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); /* * transaction_isolation will have been set to the default by the * above. If the default is "serializable", and we are in hot * standby, we will fail if we don't change it to something lower. * Fortunately, "read committed" is plenty good enough. */ XactIsoLevel = XACT_READ_COMMITTED; (void) GetTransactionSnapshot(); } /* * Perform client authentication if necessary, then figure out our * postgres user ID, and see if we are a superuser. * * In standalone mode and in autovacuum worker processes, we use a fixed * ID, otherwise we figure it out from the authenticated user name. */ if (bootstrap || IsAutoVacuumWorkerProcess()) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.", username != NULL ? username : "******"))); } else if (IsBackgroundWorker) { if (username == NULL && !OidIsValid(useroid)) { InitializeSessionUserIdStandalone(); am_superuser = true; } else { InitializeSessionUserId(username, useroid); am_superuser = superuser(); } } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username, useroid); am_superuser = superuser(); } /* * If we're trying to shut down, only superusers can connect, and new * replication connections are not allowed. */ if ((!am_superuser || am_walsender) && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) { if (am_walsender) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("new replication connections are not allowed during database shutdown"))); else ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); } /* * Binary upgrades only allowed super-user connections */ if (IsBinaryUpgrade && !am_superuser) { ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect in binary upgrade mode"))); } /* * The last few connection slots are reserved for superusers. Replication * connections are drawn from slots reserved with max_wal_senders and not * limited by max_connections or superuser_reserved_connections. */ if (!am_superuser && !am_walsender && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("remaining connection slots are reserved for non-replication superuser connections"))); /* Check replication permissions needed for walsender processes. */ if (am_walsender) { Assert(!bootstrap); if (!superuser() && !has_rolreplication(GetUserId())) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to start walsender"))); } /* * If this is a plain walsender only supporting physical replication, we * don't want to connect to any particular database. Just finish the * backend startup by processing any options from the startup packet, and * we're done. */ if (am_walsender && !am_db_walsender) { /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and default tablespace. * But note we won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db's entry in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = dbform->oid; MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); } else if (OidIsValid(dboid)) { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = dbform->oid; MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); } else { /* * If this is a background worker not bound to any particular * database, we're done now. Everything that follows only makes sense * if we are bound to a specific database. We do need to close the * transaction we started before returning. */ if (!bootstrap) { pgstat_bestart(); CommitTransactionCommand(); } return; } /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we will advertise our use of the * database in the ProcArray before dropping the lock (in fact, that's the * next thing to do). Anyone trying a DROP DATABASE after this point will * see us in the array once they have the lock. Ordering is important for * this because we don't want to advertise ourselves as being in this * database until we have the lock; otherwise we create what amounts to a * deadlock with CountOtherDBBackends(). * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Now we can mark our PGPROC entry with the database ID. * * We assume this is an atomic store so no lock is needed; though actually * things would work fine even if it weren't atomic. Anyone searching the * ProcArray for this database's ID should hold the database lock, so they * would not be executing concurrently with this store. A process looking * for another database's ID could in theory see a chance match if it read * a partially-updated databaseId value; but as long as all such searches * wait and retry, as in CountOtherDBBackends(), they will certainly see * the correct value on their next try. */ MyProc->databaseId = MyDatabaseId; /* * We established a catalog snapshot while reading pg_authid and/or * pg_database; but until we have set up MyDatabaseId, we won't react to * incoming sinval messages for unshared catalogs, so we won't realize it * if the snapshot has been invalidated. Assume it's no good anymore. */ InvalidateCatalogSnapshot(); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != ((Form_pg_database) GETSTRUCT(tuple))->oid || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser, override_allow_connections); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Process pg_db_role_setting options */ process_settings(MyDatabaseId, GetSessionUserId()); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* Initialize this backend's session state. */ InitializeSession(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
static config_log_objects * initialize_objects(void) { config_log_objects *objects; int ret; int ntup; bool isnull; StringInfoData buf; objects = palloc(sizeof(config_log_objects)); objects->table_name = pstrdup("pg_settings_log"); objects->function_name = pstrdup("pg_settings_logger"); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "Verifying config log objects"); initStringInfo(&buf); appendStringInfo( &buf, "SELECT COUNT(*)\ FROM information_schema.tables\ WHERE table_schema='%s'\ AND table_name ='%s'\ AND table_type='BASE TABLE'", config_log_schema, objects->table_name ); ret = SPI_execute(buf.data, true, 0); if (ret != SPI_OK_SELECT) { ereport(FATAL, (errmsg("SPI_execute failed: SPI error code %d", ret) )); } /* This should never happen */ if (SPI_processed != 1) { elog(FATAL, "not a singleton result"); } ntup = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); /* This should never happen */ if (isnull) { elog(FATAL, "null result"); } if (ntup == 0) { ereport(FATAL, ( errmsg("Expected config log table '%s.%s' not found", config_log_schema, objects->table_name), errhint("Ensure superuser search_path includes the schema used by config_log; " "check config_log.* GUC settings") )); } /* check function pg_settings_logger() exists */ resetStringInfo(&buf); appendStringInfo( &buf, "SELECT COUNT(*) FROM pg_catalog.pg_proc p \ INNER JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace \ WHERE p.proname='%s' \ AND n.nspname='%s' \ AND p.pronargs = 0", objects->function_name, config_log_schema ); ret = SPI_execute(buf.data, true, 0); if (ret != SPI_OK_SELECT) { ereport(FATAL, (errmsg("SPI_execute failed: SPI error code %d", ret))); } if (SPI_processed != 1) { elog(FATAL, "not a singleton result"); } ntup = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); if (isnull) { elog(FATAL, "null result"); } if (ntup == 0) { ereport(FATAL, ( errmsg("Expected config log function '%s.%s' not found", config_log_schema, objects->function_name), errhint("Ensure superuser search_path includes the schema used by config_log; " "check config_log.* GUC settings") )); } SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); log_info("initialized, database objects validated"); /* execute pg_settings_logger() here to catch any settings which have changed after server restart */ execute_pg_settings_logger(objects); return objects; }
static void SaveBuffers(void) { int i; int num_buffers; int log_level = DEBUG3; SavedBuffer *saved_buffers; volatile BufferDesc *bufHdr; // XXX: Do we really need volatile here? FILE *file = NULL; int database_counter= 0; Oid prev_database = InvalidOid; Oid prev_filenode = InvalidOid; ForkNumber prev_forknum = InvalidForkNumber; BlockNumber prev_blocknum = InvalidBlockNumber; BlockNumber range_counter = 0; const char *savefile_path; /* * XXX: If the memory request fails, ask for a smaller memory chunk, and use * it to create chunks of save-files, and make the workers read those chunks. * * This is not a concern as of now, so deferred; there's at least one other * place that allocates (NBuffers * (much_bigger_struct)), so this seems to * be an acceptable practice. */ saved_buffers = (SavedBuffer *) palloc(sizeof(SavedBuffer) * NBuffers); /* Lock the buffer partitions for reading. */ for (i = 0; i < NUM_BUFFER_PARTITIONS; ++i) LWLockAcquire(FirstBufMappingLock + i, LW_SHARED); /* Scan and save a list of valid buffers. */ for (num_buffers = 0, i = 0, bufHdr = BufferDescriptors; i < NBuffers; ++i, ++bufHdr) { /* Lock each buffer header before inspecting. */ LockBufHdr(bufHdr); /* Skip invalid buffers */ if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID)) { saved_buffers[num_buffers].database = bufHdr->tag.rnode.dbNode; saved_buffers[num_buffers].filenode = bufHdr->tag.rnode.relNode; saved_buffers[num_buffers].forknum = bufHdr->tag.forkNum; saved_buffers[num_buffers].blocknum = bufHdr->tag.blockNum; ++num_buffers; } UnlockBufHdr(bufHdr); } /* Unlock the buffer partitions in reverse order, to avoid a deadlock. */ for (i = NUM_BUFFER_PARTITIONS - 1; i >= 0; --i) LWLockRelease(FirstBufMappingLock + i); /* * Sort the list, so that we can optimize the storage of these buffers. * * The side-effect of this storage optimization is that when reading the * blocks back from relation forks, it leads to sequential reads, which * improve the restore speeds quite considerably as compared to random reads * from different blocks all over the data directory. */ pg_qsort(saved_buffers, num_buffers, sizeof(SavedBuffer), SavedBufferCmp); /* Connect to the database and start a transaction for database name lookups. */ BackgroundWorkerInitializeConnection(guc_default_database, NULL); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "saving buffers"); for (i = 0; i < num_buffers; ++i) { int j; SavedBuffer *buf = &saved_buffers[i]; if (i == 0) { /* * Special case for global objects. The sort brings them to the * front of the list. */ /* Make sure the first buffer we save belongs to global object. */ Assert(buf->database == InvalidOid); /* * Database number (and save-file name) 1 is reserverd for storing * list of buffers of global objects. */ database_counter = 1; savefile_path = getSavefileName(database_counter); file = fileOpen(savefile_path, PG_BINARY_W); writeDBName("", file, savefile_path); prev_database = buf->database; } if (buf->database != prev_database) { char *dbname; /* * We are beginning to process a different database than the * previous one; close the save-file of previous database, and open * a new one. */ ++database_counter; dbname = get_database_name(buf->database); Assert(dbname != NULL); if (file != NULL) fileClose(file, savefile_path); savefile_path = getSavefileName(database_counter); file = fileOpen(savefile_path, PG_BINARY_W); writeDBName(dbname, file, savefile_path); pfree(dbname); /* Reset trackers appropriately */ prev_database = buf->database; prev_filenode = InvalidOid; prev_forknum = InvalidForkNumber; prev_blocknum = InvalidBlockNumber; range_counter = 0; } if (buf->filenode != prev_filenode) { /* We're beginning to process a new relation; emit a record for it. */ fileWrite("r", 1, file, savefile_path); fileWrite(&(buf->filenode), sizeof(Oid), file, savefile_path); /* Reset trackers appropriately */ prev_filenode = buf->filenode; prev_forknum = InvalidForkNumber; prev_blocknum = InvalidBlockNumber; range_counter = 0; } if (buf->forknum != prev_forknum) { /* * We're beginning to process a new fork of this relation; add a * record for it. */ fileWrite("f", 1, file, savefile_path); fileWrite(&(buf->forknum), sizeof(ForkNumber), file, savefile_path); /* Reset trackers appropriately */ prev_forknum = buf->forknum; prev_blocknum = InvalidBlockNumber; range_counter = 0; } ereport(log_level, (errmsg("writer: writing block db %d filenode %d forknum %d blocknum %d", database_counter, prev_filenode, prev_forknum, buf->blocknum))); fileWrite("b", 1, file, savefile_path); fileWrite(&(buf->blocknum), sizeof(BlockNumber), file, savefile_path); prev_blocknum = buf->blocknum; /* * If a continuous range of blocks follows this block, then emit one * entry for the range, instead of one for each block. */ range_counter = 0; for ( j = i+1; j < num_buffers; ++j) { SavedBuffer *tmp = &saved_buffers[j]; if (tmp->database == prev_database && tmp->filenode == prev_filenode && tmp->forknum == prev_forknum && tmp->blocknum == (prev_blocknum + range_counter + 1)) { ++range_counter; } } if (range_counter != 0) { ereport(log_level, (errmsg("writer: writing range db %d filenode %d forknum %d blocknum %d range %d", database_counter, prev_filenode, prev_forknum, prev_blocknum, range_counter))); fileWrite("N", 1, file, savefile_path); fileWrite(&range_counter, sizeof(range_counter), file, savefile_path); i += range_counter; } } ereport(LOG, (errmsg("Buffer Saver: saved metadata of %d blocks", num_buffers))); Assert(file != NULL); fileClose(file, savefile_path); pfree(saved_buffers); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); }
void worker_spi_main(Datum main_arg) { int index = DatumGetInt32(main_arg); worktable *table; StringInfoData buf; char name[20]; table = palloc(sizeof(worktable)); sprintf(name, "schema%d", index); table->schema = pstrdup(name); table->name = pstrdup("counted"); /* Establish signal handlers before unblocking signals. */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); elog(LOG, "%s initialized with %s.%s", MyBgworkerEntry->bgw_name, table->schema, table->name); initialize_worker_spi(table); /* * Quote identifiers passed to us. Note that this must be done after * initialize_worker_spi, because that routine assumes the names are not * quoted. * * Note some memory might be leaked here. */ table->schema = quote_identifier(table->schema); table->name = quote_identifier(table->name); initStringInfo(&buf); appendStringInfo(&buf, "WITH deleted AS (DELETE " "FROM %s.%s " "WHERE type = 'delta' RETURNING value), " "total AS (SELECT coalesce(sum(value), 0) as sum " "FROM deleted) " "UPDATE %s.%s " "SET value = %s.value + total.sum " "FROM total WHERE type = 'total' " "RETURNING %s.value", table->schema, table->name, table->schema, table->name, table->name, table->name); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { int ret; int rc; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, worker_spi_naptime * 1000L, PG_WAIT_EXTENSION); ResetLatch(MyLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } /* * Start a transaction on which we can run queries. Note that each * StartTransactionCommand() call should be preceded by a * SetCurrentStatementStartTimestamp() call, which sets both the time * for the statement we're about the run, and also the transaction * start time. Also, each other query sent to SPI should probably be * preceded by SetCurrentStatementStartTimestamp(), so that statement * start time is always up to date. * * The SPI_connect() call lets us run queries through the SPI manager, * and the PushActiveSnapshot() call creates an "active" snapshot * which is necessary for queries to have MVCC data to work on. * * The pgstat_report_activity() call makes our activity visible * through the pgstat views. */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* We can now execute queries via SPI */ ret = SPI_execute(buf.data, false, 0); if (ret != SPI_OK_UPDATE_RETURNING) elog(FATAL, "cannot select from table %s.%s: error code %d", table->schema, table->name, ret); if (SPI_processed > 0) { bool isnull; int32 val; val = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); if (!isnull) elog(LOG, "%s: count in %s.%s is now %d", MyBgworkerEntry->bgw_name, table->schema, table->name, val); } /* * And finish our transaction. */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_stat(false); pgstat_report_activity(STATE_IDLE, NULL); } proc_exit(1); }
/* * ProcessCatchupEvent * * Respond to a catchup event (PROCSIG_CATCHUP_INTERRUPT) from another * backend. * * This is called either directly from the PROCSIG_CATCHUP_INTERRUPT * signal handler, or the next time control reaches the outer idle loop * (assuming there's still anything to do by then). */ static void ProcessCatchupEvent(void) { bool notify_enabled; bool client_wait_timeout_enabled; DtxContext saveDistributedTransactionContext; /* * Funny indentation to keep the code inside identical to upstream * while at the same time supporting CMockery which has problems with * multiple bracing on column 1. */ PG_TRY(); { in_process_catchup_event = 1; /* Must prevent SIGUSR2 and SIGALRM(for IdleSessionGangTimeout) interrupt while I am running */ notify_enabled = DisableNotifyInterrupt(); client_wait_timeout_enabled = DisableClientWaitTimeoutInterrupt(); /* * What we need to do here is cause ReceiveSharedInvalidMessages() to run, * which will do the necessary work and also reset the * catchupInterruptOccurred flag. If we are inside a transaction we can * just call AcceptInvalidationMessages() to do this. If we aren't, we * start and immediately end a transaction; the call to * AcceptInvalidationMessages() happens down inside transaction start. * * It is awfully tempting to just call AcceptInvalidationMessages() * without the rest of the xact start/stop overhead, and I think that * would actually work in the normal case; but I am not sure that things * would clean up nicely if we got an error partway through. */ if (IsTransactionOrTransactionBlock()) { elog(DEBUG1, "ProcessCatchupEvent inside transaction"); AcceptInvalidationMessages(); } else { elog(DEBUG1, "ProcessCatchupEvent outside transaction"); /* * Save distributed transaction context first. */ saveDistributedTransactionContext = DistributedTransactionContext; DistributedTransactionContext = DTX_CONTEXT_LOCAL_ONLY; StartTransactionCommand(); CommitTransactionCommand(); DistributedTransactionContext = saveDistributedTransactionContext; } if (notify_enabled) EnableNotifyInterrupt(); if (client_wait_timeout_enabled) EnableClientWaitTimeoutInterrupt(); in_process_catchup_event = 0; } PG_CATCH(); { in_process_catchup_event = 0; PG_RE_THROW(); } PG_END_TRY(); }
static void ReadBlocks(int filenum) { FILE *file; char record_type; char *dbname; Oid record_filenode; ForkNumber record_forknum; BlockNumber record_blocknum; BlockNumber record_range; int log_level = DEBUG3; Oid relOid = InvalidOid; Relation rel = NULL; bool skip_relation = false; bool skip_fork = false; bool skip_block = false; BlockNumber nblocks = 0; BlockNumber blocks_restored = 0; const char *filepath; /* * If this condition changes, then this code, and the code in the writer * will need to be changed; especially the format specifiers in log and * error messages. */ StaticAssertStmt(MaxBlockNumber == 0xFFFFFFFE, "Code may need review."); filepath = getSavefileName(filenum); file = fileOpen(filepath, PG_BINARY_R); dbname = readDBName(file, filepath); /* * When restoring global objects, the dbname is zero-length string, and non- * zero length otherwise. And filenum is never expected to be smaller than 1. */ Assert(filenum >= 1); Assert(filenum == 1 ? strlen(dbname) == 0 : strlen(dbname) > 0); /* To restore the global objects, use default database */ BackgroundWorkerInitializeConnection(filenum == 1 ? guc_default_database : dbname, NULL); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "restoring buffers"); /* * Note that in case of a read error, we will leak relcache entry that we may * currently have open. In case of EOF, we close the relation after the loop. */ while (fileRead(&record_type, 1, file, true, filepath)) { /* * If we want to process the signals, this seems to be the best place * to do it. Generally the backends refrain from processing config file * while in transaction, but that's more for the fear of allowing GUC * changes to affect expression evaluation, causing different results * for the same expression in a transaction. Since this worker is not * processing any queries, it is okay to process the config file here. * * Even though it's okay to process SIGHUP here, doing so doesn't add * any value. The only reason we might want to process config file here * would be to allow the user to interrupt the BlockReader's operation * by changing this extenstion's GUC parameter. But the user can do that * anyway, using SIGTERM or pg_terminate_backend(). */ /* Stop processing the save-file if the Postmaster wants us to die. */ if (got_sigterm) break; ereport(log_level, (errmsg("record type %x - %c", record_type, record_type))); switch (record_type) { case 'r': { /* Close the previous relation, if any. */ if (rel) { relation_close(rel, AccessShareLock); rel = NULL; } record_forknum = InvalidForkNumber; record_blocknum = InvalidBlockNumber; nblocks = 0; fileRead(&record_filenode, sizeof(Oid), file, false, filepath); relOid = GetRelOid(record_filenode); ereport(log_level, (errmsg("processing filenode %u, relation %u", record_filenode, relOid))); /* * If the relation has been rewritten/dropped since we saved it, * just skip it and process the next relation. */ if (relOid == InvalidOid) skip_relation = true; else { skip_relation = false; /* Open the relation */ rel = relation_open(relOid, AccessShareLock); RelationOpenSmgr(rel); } } break; case 'f': { record_blocknum = InvalidBlockNumber; nblocks = 0; fileRead(&record_forknum, sizeof(ForkNumber), file, false, filepath); if (skip_relation) continue; if (rel == NULL) ereport(ERROR, (errmsg("found a fork record without a preceeding relation record"))); ereport(log_level, (errmsg("processing fork %d", record_forknum))); if (!smgrexists(rel->rd_smgr, record_forknum)) skip_fork = true; else { skip_fork = false; nblocks = RelationGetNumberOfBlocksInFork(rel, record_forknum); } } break; case 'b': { if (record_forknum == InvalidForkNumber) ereport(ERROR, (errmsg("found a block record without a preceeding fork record"))); fileRead(&record_blocknum, sizeof(BlockNumber), file, false, filepath); if (skip_relation || skip_fork) continue; /* * Don't try to read past the file; the file may have been shrunk * by a vaccum/truncate operation. */ if (record_blocknum >= nblocks) { ereport(log_level, (errmsg("reader %d skipping block filenode %u forknum %d blocknum %u", filenum, record_filenode, record_forknum, record_blocknum))); skip_block = true; continue; } else { Buffer buf; skip_block = false; ereport(log_level, (errmsg("reader %d reading block filenode %u forknum %d blocknum %u", filenum, record_filenode, record_forknum, record_blocknum))); buf = ReadBufferExtended(rel, record_forknum, record_blocknum, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_restored; } } break; case 'N': { BlockNumber block; Assert(record_blocknum != InvalidBlockNumber); if (record_blocknum == InvalidBlockNumber) ereport(ERROR, (errmsg("found a block range record without a preceeding block record"))); fileRead(&record_range, sizeof(int), file, false, filepath); if (skip_relation || skip_fork || skip_block) continue; ereport(log_level, (errmsg("reader %d reading range filenode %u forknum %d blocknum %u range %u", filenum, record_filenode, record_forknum, record_blocknum, record_range))); for (block = record_blocknum + 1; block <= (record_blocknum + record_range); ++block) { Buffer buf; /* * Don't try to read past the file; the file may have been * shrunk by a vaccum operation. */ if (block >= nblocks) { ereport(log_level, (errmsg("reader %d skipping block range filenode %u forknum %d start %u end %u", filenum, record_filenode, record_forknum, block, record_blocknum + record_range))); break; } buf = ReadBufferExtended(rel, record_forknum, block, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_restored; } } break; default: { ereport(ERROR, (errmsg("found unexpected save-file marker %x - %c)", record_type, record_type))); Assert(false); } break; } } if (rel) relation_close(rel, AccessShareLock); ereport(LOG, (errmsg("Block Reader %d: restored %u blocks", filenum, blocks_restored))); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); fileClose(file, filepath); /* Remove the save-file */ if (remove(filepath) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("error removing file \"%s\" : %m", filepath))); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *dbname, const char *username) { bool bootstrap = IsBootstrapProcessingMode(); /* * Set up the global variables holding database id and path. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; SetDatabasePath(GetDatabasePath(MyDatabaseId)); } else { char *fullpath, datpath[MAXPGPATH]; /* * Formerly we validated DataDir here, but now that's done * earlier. */ /* * Find oid and path of the database we're about to open. Since * we're not yet up and running we have to use the hackish * GetRawDatabaseInfo. */ GetRawDatabaseInfo(dbname, &MyDatabaseId, datpath); if (!OidIsValid(MyDatabaseId)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); fullpath = GetDatabasePath(MyDatabaseId); /* Verify the database path */ if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); if (chdir(fullpath) == -1) ereport(FATAL, (errcode_for_file_access(), errmsg("could not change directory to \"%s\": %m", fullpath))); SetDatabasePath(fullpath); } /* * Code after this point assumes we are in the proper directory! */ /* * Set up my per-backend PGPROC struct in shared memory. (We need * to know MyDatabaseId before we can do this, since it's entered into * the PGPROC struct.) */ InitProcess(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. (Formerly this came before InitProcess, but now * it must happen after, because it uses MyProc.) Once I have done * this, I am visible to other backends! * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; InitBackendSharedInvalidationState(); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* * Initialize the transaction system override state. */ AmiTransactionOverride(bootstrap); /* * Initialize the relation descriptor cache. This must create at * least the minimum set of "nailed-in" cache entries. No catalog * access happens here. */ RelationCacheInitialize(); /* * Initialize all the system catalog caches. Note that no catalog * access happens here; we only set up the cache structure. */ InitCatalogCache(); /* Initialize portal manager */ EnablePortalManager(); /* * Initialize the deferred trigger manager --- must happen before * first transaction start. */ DeferredTriggerInit(); /* start a new transaction here before access to db */ if (!bootstrap) StartTransactionCommand(); /* * It's now possible to do real access to the system catalogs. * * Replace faked-up relcache entries with correct info. */ RelationCacheInitializePhase2(); /* * Figure out our postgres user id. In standalone mode we use a fixed * id, otherwise we figure it out from the authenticated user name. */ if (bootstrap) InitializeSessionUserIdStandalone(); else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); if (!ThereIsAtLeastOneUser()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no users are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" WITH SYSID %d CREATEUSER;.", username, BOOTSTRAP_USESYSID))); } else { /* normal multiuser case */ InitializeSessionUserId(username); } /* * Unless we are bootstrapping, double-check that InitMyDatabaseInfo() * got a correct result. We can't do this until all the * database-access infrastructure is up. */ if (!bootstrap) ReverifyMyDatabase(dbname); /* * Final phase of relation cache startup: write a new cache file if * necessary. This is done after ReverifyMyDatabase to avoid writing * a cache file into a dead database. */ RelationCacheInitializePhase3(); /* * Check a normal user hasn't connected to a superuser reserved slot. * We can't do this till after we've read the user information, and we * must do it inside a transaction since checking superuserness may * require database access. The superuser check is probably the most * expensive part; don't do it until necessary. */ if (ReservedBackends > 0 && CountEmptyBackendSlots() < ReservedBackends && !superuser()) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"))); /* * Initialize various default states that can't be set up until we've * selected the active user and done ReverifyMyDatabase. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* * Now all default states are fully set up. Report them to client if * appropriate. */ BeginReportingGUCOptions(); /* * Set up process-exit callback to do pre-shutdown cleanup. This * should be last because we want shmem_exit to call this routine * before the exit callbacks that are registered by buffer manager, * lock manager, etc. We need to run this code before we close down * database access! */ on_shmem_exit(ShutdownPostgres, 0); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
static void worker_spi_main(Datum main_arg) { /* Register functions for SIGTERM/SIGHUP management */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); while (!got_sigterm) { int ret; int rc; StringInfoData buf; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); initStringInfo(&buf); /* Build the query string */ appendStringInfo(&buf, "SELECT count(*) FROM pg_class;"); ret = SPI_execute(buf.data, true, 0); /* Some error messages in case of incorrect handling */ if (ret != SPI_OK_SELECT) elog(FATAL, "SPI_execute failed: error code %d", ret); if (SPI_processed > 0) { int32 count; bool isnull; count = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); elog(LOG, "Currently %d relations in database", count); } SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); } proc_exit(0); }
/* * This function receives a command names and a query and it produces a protocol valid command string * which can be send to the server based on the format of the result set. It handles memory contexts * appropriately, returning a pointer to the command string in the current context when called. * */ char* exec_to_command(const char* command, char* q) { StringInfoData resultbuf; char* result; int i, j, processed, retval; SPITupleTable *coltuptable; MemoryContext pre_context; MemoryContext spi_conn_context; // elog(LOG, "%s", command); //prints the current command running pre_context = CurrentMemoryContext; SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); retval = SPI_execute(q, false, 0); if (retval != SPI_OK_SELECT) { elog(LOG, "Database SELECT execution failed: %d", retval); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); result = pstrdup(""); return result; } processed = SPI_processed; coltuptable = SPI_tuptable; initStringInfo(&resultbuf); appendStringInfoString(&resultbuf, command); appendStringInfoString(&resultbuf, ";"); //artisinal semicolon if (coltuptable != NULL) { for(i = 0; i < processed; i++) { for(j = 1; j <= coltuptable->tupdesc->natts; j++) { if (SPI_getvalue(coltuptable->vals[i], coltuptable->tupdesc, j) != NULL) { appendStringInfoString(&resultbuf, SPI_getvalue(coltuptable->vals[i], coltuptable->tupdesc, j)); } appendStringInfo(&resultbuf, FIELD_DELIMIT); } appendStringInfo(&resultbuf,REC_DELIMIT); } } appendStringInfo(&resultbuf, CDELIMIT); spi_conn_context = MemoryContextSwitchTo(pre_context); result = pstrdup(resultbuf.data); MemoryContextSwitchTo(spi_conn_context); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); return result; }
/* * update segment configuration in catalog and shared memory */ static bool probeUpdateConfig(FtsSegmentStatusChange *changes, int changeCount) { Relation configrel; Relation histrel; SysScanDesc sscan; ScanKeyData scankey; HeapTuple configtuple; HeapTuple newtuple; HeapTuple histtuple; Datum configvals[Natts_gp_segment_configuration]; bool confignulls[Natts_gp_segment_configuration] = { false }; bool repls[Natts_gp_segment_configuration] = { false }; Datum histvals[Natts_gp_configuration_history]; bool histnulls[Natts_gp_configuration_history] = { false }; bool valid; bool primary; bool changelogging; int i; char desc[SQL_CMD_BUF_SIZE]; /* * Commit/abort transaction below will destroy * CurrentResourceOwner. We need it for catalog reads. */ ResourceOwner save = CurrentResourceOwner; StartTransactionCommand(); elog(LOG, "probeUpdateConfig called for %d changes", changeCount); histrel = heap_open(GpConfigHistoryRelationId, RowExclusiveLock); configrel = heap_open(GpSegmentConfigRelationId, RowExclusiveLock); for (i = 0; i < changeCount; i++) { FtsSegmentStatusChange *change = &changes[i]; valid = (changes[i].newStatus & FTS_STATUS_ALIVE); primary = (changes[i].newStatus & FTS_STATUS_PRIMARY); changelogging = (changes[i].newStatus & FTS_STATUS_CHANGELOGGING); if (changelogging) { Assert(failover_strategy == 'f'); Assert(primary && valid); } Assert((valid || !primary) && "Primary cannot be down"); /* * Insert new tuple into gp_configuration_history catalog. */ histvals[Anum_gp_configuration_history_time-1] = TimestampTzGetDatum(GetCurrentTimestamp()); histvals[Anum_gp_configuration_history_dbid-1] = Int16GetDatum(changes[i].dbid); snprintf(desc, sizeof(desc), "FTS: content %d fault marking status %s%s role %c", change->segindex, valid ? "UP" : "DOWN", (changelogging) ? " mode: change-tracking" : "", primary ? 'p' : 'm'); histvals[Anum_gp_configuration_history_desc-1] = CStringGetTextDatum(desc); histtuple = heap_form_tuple(RelationGetDescr(histrel), histvals, histnulls); simple_heap_insert(histrel, histtuple); CatalogUpdateIndexes(histrel, histtuple); /* * Find and update gp_segment_configuration tuple. */ ScanKeyInit(&scankey, Anum_gp_segment_configuration_dbid, BTEqualStrategyNumber, F_INT2EQ, Int16GetDatum(changes[i].dbid)); sscan = systable_beginscan(configrel, GpSegmentConfigDbidIndexId, true, SnapshotNow, 1, &scankey); configtuple = systable_getnext(sscan); if (!HeapTupleIsValid(configtuple)) { elog(ERROR, "FTS cannot find dbid=%d in %s", changes[i].dbid, RelationGetRelationName(configrel)); } configvals[Anum_gp_segment_configuration_role-1] = CharGetDatum(primary ? 'p' : 'm'); repls[Anum_gp_segment_configuration_role-1] = true; configvals[Anum_gp_segment_configuration_status-1] = CharGetDatum(valid ? 'u' : 'd'); repls[Anum_gp_segment_configuration_status-1] = true; if (changelogging) { configvals[Anum_gp_segment_configuration_mode-1] = CharGetDatum('c'); } repls[Anum_gp_segment_configuration_mode-1] = changelogging; newtuple = heap_modify_tuple(configtuple, RelationGetDescr(configrel), configvals, confignulls, repls); simple_heap_update(configrel, &configtuple->t_self, newtuple); CatalogUpdateIndexes(configrel, newtuple); systable_endscan(sscan); pfree(newtuple); /* * Update shared memory */ ftsProbeInfo->fts_status[changes[i].dbid] = changes[i].newStatus; } heap_close(histrel, RowExclusiveLock); heap_close(configrel, RowExclusiveLock); SIMPLE_FAULT_INJECTOR(FtsWaitForShutdown); /* * Do not block shutdown. We will always get a change to update * gp_segment_configuration in subsequent probes upon database * restart. */ if (shutdown_requested) { elog(LOG, "Shutdown in progress, ignoring FTS prober updates."); return false; } CommitTransactionCommand(); CurrentResourceOwner = save; return true; }
/** * Marks the given db as in-sync in the segment configuration. */ void FtsMarkSegmentsInSync(CdbComponentDatabaseInfo *primary, CdbComponentDatabaseInfo *mirror) { if (!FTS_STATUS_ISALIVE(primary->dbid, ftsProbeInfo->fts_status) || !FTS_STATUS_ISALIVE(mirror->dbid, ftsProbeInfo->fts_status) || !FTS_STATUS_ISPRIMARY(primary->dbid, ftsProbeInfo->fts_status) || FTS_STATUS_ISPRIMARY(mirror->dbid, ftsProbeInfo->fts_status) || FTS_STATUS_IS_SYNCED(primary->dbid, ftsProbeInfo->fts_status) || FTS_STATUS_IS_SYNCED(mirror->dbid, ftsProbeInfo->fts_status) || FTS_STATUS_IS_CHANGELOGGING(primary->dbid, ftsProbeInfo->fts_status) || FTS_STATUS_IS_CHANGELOGGING(mirror->dbid, ftsProbeInfo->fts_status)) { FtsRequestPostmasterShutdown(primary, mirror); } if (ftsProbeInfo->fts_pauseProbes) { return; } uint8 segStatus=0; Relation configrel; Relation histrel; ScanKeyData scankey; SysScanDesc sscan; HeapTuple configtuple; HeapTuple newtuple; HeapTuple histtuple; Datum configvals[Natts_gp_segment_configuration]; bool confignulls[Natts_gp_segment_configuration] = { false }; bool repls[Natts_gp_segment_configuration] = { false }; Datum histvals[Natts_gp_configuration_history]; bool histnulls[Natts_gp_configuration_history] = { false }; char *desc = "FTS: changed segment to insync from resync."; /* * Commit/abort transaction below will destroy * CurrentResourceOwner. We need it for catalog reads. */ ResourceOwner save = CurrentResourceOwner; StartTransactionCommand(); /* update primary */ segStatus = ftsProbeInfo->fts_status[primary->dbid]; segStatus |= FTS_STATUS_SYNCHRONIZED; ftsProbeInfo->fts_status[primary->dbid] = segStatus; /* update mirror */ segStatus = ftsProbeInfo->fts_status[mirror->dbid]; segStatus |= FTS_STATUS_SYNCHRONIZED; ftsProbeInfo->fts_status[mirror->dbid] = segStatus; histrel = heap_open(GpConfigHistoryRelationId, RowExclusiveLock); configrel = heap_open(GpSegmentConfigRelationId, RowExclusiveLock); /* update gp_segment_configuration to insync */ ScanKeyInit(&scankey, Anum_gp_segment_configuration_dbid, BTEqualStrategyNumber, F_INT2EQ, Int16GetDatum(primary->dbid)); sscan = systable_beginscan(configrel, GpSegmentConfigDbidIndexId, true, SnapshotNow, 1, &scankey); configtuple = systable_getnext(sscan); if (!HeapTupleIsValid(configtuple)) { elog(ERROR,"FTS cannot find dbid (%d, %d) in %s", primary->dbid, mirror->dbid, RelationGetRelationName(configrel)); } configvals[Anum_gp_segment_configuration_mode-1] = CharGetDatum('s'); repls[Anum_gp_segment_configuration_mode-1] = true; newtuple = heap_modify_tuple(configtuple, RelationGetDescr(configrel), configvals, confignulls, repls); simple_heap_update(configrel, &configtuple->t_self, newtuple); CatalogUpdateIndexes(configrel, newtuple); systable_endscan(sscan); ScanKeyInit(&scankey, Anum_gp_segment_configuration_dbid, BTEqualStrategyNumber, F_INT2EQ, Int16GetDatum(mirror->dbid)); sscan = systable_beginscan(configrel, GpSegmentConfigDbidIndexId, true, SnapshotNow, 1, &scankey); configtuple = systable_getnext(sscan); if (!HeapTupleIsValid(configtuple)) { elog(ERROR,"FTS cannot find dbid (%d, %d) in %s", primary->dbid, mirror->dbid, RelationGetRelationName(configrel)); } newtuple = heap_modify_tuple(configtuple, RelationGetDescr(configrel), configvals, confignulls, repls); simple_heap_update(configrel, &configtuple->t_self, newtuple); CatalogUpdateIndexes(configrel, newtuple); systable_endscan(sscan); /* update configuration history */ histvals[Anum_gp_configuration_history_time-1] = TimestampTzGetDatum(GetCurrentTimestamp()); histvals[Anum_gp_configuration_history_dbid-1] = Int16GetDatum(primary->dbid); histvals[Anum_gp_configuration_history_desc-1] = CStringGetTextDatum(desc); histtuple = heap_form_tuple(RelationGetDescr(histrel), histvals, histnulls); simple_heap_insert(histrel, histtuple); CatalogUpdateIndexes(histrel, histtuple); histvals[Anum_gp_configuration_history_dbid-1] = Int16GetDatum(mirror->dbid); histtuple = heap_form_tuple(RelationGetDescr(histrel), histvals, histnulls); simple_heap_insert(histrel, histtuple); CatalogUpdateIndexes(histrel, histtuple); ereport(LOG, (errmsg("FTS: resynchronization of mirror (dbid=%d, content=%d) on %s:%d has completed.", mirror->dbid, mirror->segindex, mirror->address, mirror->port ), errSendAlert(true))); heap_close(histrel, RowExclusiveLock); heap_close(configrel, RowExclusiveLock); /* * Do not block shutdown. We will always get a change to update * gp_segment_configuration in subsequent probes upon database * restart. */ if (shutdown_requested) { elog(LOG, "Shutdown in progress, ignoring FTS prober updates."); return; } CommitTransactionCommand(); CurrentResourceOwner = save; }